from __future__ import print_function
import os.path
import dalmatian as dm
import pandas as pd
import sys
sys.path.insert(0, '../../')
#import Datanalytics as da
from JKBio import TerraFunction as terra
%load_ext autoreload
%autoreload 2
from JKBio import Helper as h
import pickle
from taigapy import TaigaClient
tc = TaigaClient()
import numpy as np
import itertools
from bokeh.plotting import *
from bokeh.models import HoverTool
output_notebook()
import matplotlib.pyplot as plt
%load_ext rpy2.ipython
import seaborn as sns
import gseapy
import matplotlib.pyplot as plt
import networkx as nx
from JKBio.helper import pyDESeq2
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import AgglomerativeClustering, DBSCAN
from sklearn.manifold import MDS, TSNE
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
! gsutil mv gs://transfer-amlproject/*MP7624* gs://transfer-amlproject/RNPv2/
Copying gs://transfer-amlproject/20200304_10_MP7624_S10_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_10_MP7624_S10_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_10_MP7624_S10_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_10_MP7624_S10_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_11_MP7624_S11_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_11_MP7624_S11_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_11_MP7624_S11_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_11_MP7624_S11_R2_001.fastq.gz... ==> NOTE: You are performing a sequence of gsutil operations that may run significantly faster if you instead use gsutil -m cp ... Please see the -m section under "gsutil help options" for further information about when gsutil -m can be advantageous. Copying gs://transfer-amlproject/20200304_12_MP7624_S12_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_12_MP7624_S12_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_12_MP7624_S12_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_12_MP7624_S12_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_13_MP7624_S13_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_13_MP7624_S13_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_13_MP7624_S13_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_13_MP7624_S13_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_14_MP7624_S14_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_14_MP7624_S14_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_14_MP7624_S14_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_14_MP7624_S14_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_15_MP7624_S15_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_15_MP7624_S15_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_15_MP7624_S15_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_15_MP7624_S15_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_16_MP7624_S16_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_16_MP7624_S16_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_16_MP7624_S16_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_16_MP7624_S16_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_17_MP7624_S17_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_17_MP7624_S17_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_17_MP7624_S17_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_17_MP7624_S17_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_18_MP7624_S18_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_18_MP7624_S18_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_18_MP7624_S18_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_18_MP7624_S18_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_19_MP7624_S19_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_19_MP7624_S19_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_19_MP7624_S19_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_19_MP7624_S19_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_1_MP7624_S1_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_1_MP7624_S1_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_1_MP7624_S1_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_1_MP7624_S1_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_20_MP7624_S20_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_20_MP7624_S20_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_20_MP7624_S20_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_20_MP7624_S20_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_21_MP7624_S21_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_21_MP7624_S21_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_21_MP7624_S21_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_21_MP7624_S21_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_22_MP7624_S22_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_22_MP7624_S22_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_22_MP7624_S22_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_22_MP7624_S22_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_23_MP7624_S23_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_23_MP7624_S23_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_23_MP7624_S23_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_23_MP7624_S23_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_24_MP7624_S24_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_24_MP7624_S24_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_24_MP7624_S24_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_24_MP7624_S24_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_25_MP7624_S25_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_25_MP7624_S25_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_25_MP7624_S25_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_25_MP7624_S25_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_26_MP7624_S26_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_26_MP7624_S26_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_26_MP7624_S26_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_26_MP7624_S26_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_27_MP7624_S27_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_27_MP7624_S27_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_27_MP7624_S27_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_27_MP7624_S27_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_28_MP7624_S28_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_28_MP7624_S28_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_28_MP7624_S28_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_28_MP7624_S28_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_29_MP7624_S29_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_29_MP7624_S29_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_29_MP7624_S29_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_29_MP7624_S29_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_2_MP7624_S2_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_2_MP7624_S2_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_2_MP7624_S2_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_2_MP7624_S2_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_30_MP7624_S30_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_30_MP7624_S30_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_30_MP7624_S30_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_30_MP7624_S30_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_31_MP7624_S31_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_31_MP7624_S31_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_31_MP7624_S31_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_31_MP7624_S31_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_32_MP7624_S32_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_32_MP7624_S32_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_32_MP7624_S32_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_32_MP7624_S32_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_33_MP7624_S33_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_33_MP7624_S33_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_33_MP7624_S33_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_33_MP7624_S33_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_34_MP7624_S34_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_34_MP7624_S34_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_34_MP7624_S34_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_34_MP7624_S34_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_35_MP7624_S35_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_35_MP7624_S35_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_35_MP7624_S35_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_35_MP7624_S35_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_36_MP7624_S36_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_36_MP7624_S36_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_36_MP7624_S36_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_36_MP7624_S36_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_37_MP7624_S37_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_37_MP7624_S37_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_37_MP7624_S37_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_37_MP7624_S37_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_38_MP7624_S38_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_38_MP7624_S38_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_38_MP7624_S38_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_38_MP7624_S38_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_39_MP7624_S39_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_39_MP7624_S39_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_39_MP7624_S39_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_39_MP7624_S39_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_3_MP7624_S3_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_3_MP7624_S3_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_3_MP7624_S3_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_3_MP7624_S3_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_40_MP7624_S40_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_40_MP7624_S40_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_40_MP7624_S40_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_40_MP7624_S40_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_41_MP7624_S41_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_41_MP7624_S41_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_41_MP7624_S41_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_41_MP7624_S41_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_42_MP7624_S42_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_42_MP7624_S42_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_42_MP7624_S42_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_42_MP7624_S42_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_43_MP7624_S43_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_43_MP7624_S43_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_43_MP7624_S43_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_43_MP7624_S43_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_44_MP7624_S44_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_44_MP7624_S44_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_44_MP7624_S44_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_44_MP7624_S44_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_45_MP7624_S45_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_45_MP7624_S45_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_45_MP7624_S45_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_45_MP7624_S45_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_46_MP7624_S46_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_46_MP7624_S46_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_46_MP7624_S46_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_46_MP7624_S46_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_47_MP7624_S47_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_47_MP7624_S47_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_47_MP7624_S47_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_47_MP7624_S47_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_48_MP7624_S48_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_48_MP7624_S48_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_48_MP7624_S48_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_48_MP7624_S48_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_49_MP7624_S49_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_49_MP7624_S49_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_49_MP7624_S49_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_49_MP7624_S49_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_4_MP7624_S4_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_4_MP7624_S4_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_4_MP7624_S4_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_4_MP7624_S4_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_50_MP7624_S50_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_50_MP7624_S50_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_50_MP7624_S50_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_50_MP7624_S50_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_51_MP7624_S51_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_51_MP7624_S51_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_51_MP7624_S51_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_51_MP7624_S51_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_52_MP7624_S52_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_52_MP7624_S52_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_52_MP7624_S52_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_52_MP7624_S52_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_53_MP7624_S53_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_53_MP7624_S53_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_53_MP7624_S53_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_53_MP7624_S53_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_54_MP7624_S54_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_54_MP7624_S54_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_54_MP7624_S54_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_54_MP7624_S54_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_55_MP7624_S55_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_55_MP7624_S55_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_55_MP7624_S55_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_55_MP7624_S55_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_56_MP7624_S56_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_56_MP7624_S56_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_56_MP7624_S56_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_56_MP7624_S56_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_57_MP7624_S57_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_57_MP7624_S57_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_57_MP7624_S57_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_57_MP7624_S57_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_58_MP7624_S58_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_58_MP7624_S58_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_58_MP7624_S58_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_58_MP7624_S58_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_59_MP7624_S59_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_59_MP7624_S59_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_59_MP7624_S59_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_59_MP7624_S59_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_5_MP7624_S5_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_5_MP7624_S5_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_5_MP7624_S5_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_5_MP7624_S5_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_60_MP7624_S60_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_60_MP7624_S60_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_60_MP7624_S60_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_60_MP7624_S60_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_61_MP7624_S61_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_61_MP7624_S61_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_61_MP7624_S61_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_61_MP7624_S61_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_62_MP7624_S62_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_62_MP7624_S62_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_62_MP7624_S62_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_62_MP7624_S62_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_63_MP7624_S63_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_63_MP7624_S63_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_63_MP7624_S63_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_63_MP7624_S63_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_64_MP7624_S64_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_64_MP7624_S64_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_64_MP7624_S64_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_64_MP7624_S64_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_65_MP7624_S65_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_65_MP7624_S65_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_65_MP7624_S65_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_65_MP7624_S65_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_66_MP7624_S66_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_66_MP7624_S66_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_66_MP7624_S66_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_66_MP7624_S66_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_67_MP7624_S67_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_67_MP7624_S67_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_67_MP7624_S67_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_67_MP7624_S67_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_68_MP7624_S68_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_68_MP7624_S68_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_68_MP7624_S68_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_68_MP7624_S68_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_69_MP7624_S69_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_69_MP7624_S69_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_69_MP7624_S69_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_69_MP7624_S69_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_6_MP7624_S6_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_6_MP7624_S6_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_6_MP7624_S6_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_6_MP7624_S6_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_7_MP7624_S7_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_7_MP7624_S7_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_7_MP7624_S7_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_7_MP7624_S7_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_8_MP7624_S8_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_8_MP7624_S8_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_8_MP7624_S8_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_8_MP7624_S8_R2_001.fastq.gz... Copying gs://transfer-amlproject/20200304_9_MP7624_S9_R1_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_9_MP7624_S9_R1_001.fastq.gz... Copying gs://transfer-amlproject/20200304_9_MP7624_S9_R2_001.fastq.gz [Content-Type=application/octet-stream]... Removing gs://transfer-amlproject/20200304_9_MP7624_S9_R2_001.fastq.gz... ==> NOTE: You are performing a sequence of gsutil operations that may run significantly faster if you instead use gsutil -m cp ... Please see the -m section under "gsutil help options" for further information about when gsutil -m can be advantageous. Operation completed over 138 objects/240.6 GiB.
! gsutil -m cp -r gs://transfer-amlproject/RNPv3 gs://amlproject/RNA/
Copying gs://transfer-amlproject/RNPv2/20200304_10_MP7624_S10_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_10_MP7624_S10_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_11_MP7624_S11_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_11_MP7624_S11_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_12_MP7624_S12_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_12_MP7624_S12_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_13_MP7624_S13_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_14_MP7624_S14_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_13_MP7624_S13_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_14_MP7624_S14_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_15_MP7624_S15_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_15_MP7624_S15_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_16_MP7624_S16_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_16_MP7624_S16_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_17_MP7624_S17_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_17_MP7624_S17_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_18_MP7624_S18_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_18_MP7624_S18_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_19_MP7624_S19_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_19_MP7624_S19_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_1_MP7624_S1_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_1_MP7624_S1_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_20_MP7624_S20_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_20_MP7624_S20_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_21_MP7624_S21_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_21_MP7624_S21_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_22_MP7624_S22_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_22_MP7624_S22_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_23_MP7624_S23_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_23_MP7624_S23_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_24_MP7624_S24_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_24_MP7624_S24_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_25_MP7624_S25_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_25_MP7624_S25_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_26_MP7624_S26_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_26_MP7624_S26_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_27_MP7624_S27_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_27_MP7624_S27_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_28_MP7624_S28_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_29_MP7624_S29_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_28_MP7624_S28_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_2_MP7624_S2_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_29_MP7624_S29_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_2_MP7624_S2_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_30_MP7624_S30_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_30_MP7624_S30_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_31_MP7624_S31_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_32_MP7624_S32_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_31_MP7624_S31_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_33_MP7624_S33_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_32_MP7624_S32_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_33_MP7624_S33_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_34_MP7624_S34_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_34_MP7624_S34_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_35_MP7624_S35_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_48_MP7624_S48_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_35_MP7624_S35_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_36_MP7624_S36_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_36_MP7624_S36_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_37_MP7624_S37_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_38_MP7624_S38_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_37_MP7624_S37_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_3_MP7624_S3_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_38_MP7624_S38_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_39_MP7624_S39_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_39_MP7624_S39_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_40_MP7624_S40_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_3_MP7624_S3_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_40_MP7624_S40_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_42_MP7624_S42_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_41_MP7624_S41_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_47_MP7624_S47_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_44_MP7624_S44_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_41_MP7624_S41_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_42_MP7624_S42_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_43_MP7624_S43_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_45_MP7624_S45_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_43_MP7624_S43_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_44_MP7624_S44_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_45_MP7624_S45_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_46_MP7624_S46_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_46_MP7624_S46_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_49_MP7624_S49_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_47_MP7624_S47_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_49_MP7624_S49_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_4_MP7624_S4_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_51_MP7624_S51_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_4_MP7624_S4_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_48_MP7624_S48_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_51_MP7624_S51_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_50_MP7624_S50_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_50_MP7624_S50_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_52_MP7624_S52_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_53_MP7624_S53_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_52_MP7624_S52_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_53_MP7624_S53_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_54_MP7624_S54_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_54_MP7624_S54_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_55_MP7624_S55_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_55_MP7624_S55_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_56_MP7624_S56_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_56_MP7624_S56_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_57_MP7624_S57_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_57_MP7624_S57_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_58_MP7624_S58_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_58_MP7624_S58_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_59_MP7624_S59_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_59_MP7624_S59_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_5_MP7624_S5_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_5_MP7624_S5_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_60_MP7624_S60_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_60_MP7624_S60_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_61_MP7624_S61_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_61_MP7624_S61_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_62_MP7624_S62_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_62_MP7624_S62_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_63_MP7624_S63_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_63_MP7624_S63_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_64_MP7624_S64_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_64_MP7624_S64_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_65_MP7624_S65_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_65_MP7624_S65_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_66_MP7624_S66_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_66_MP7624_S66_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_67_MP7624_S67_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_68_MP7624_S68_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_68_MP7624_S68_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_67_MP7624_S67_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_69_MP7624_S69_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_6_MP7624_S6_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_69_MP7624_S69_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_6_MP7624_S6_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_7_MP7624_S7_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_7_MP7624_S7_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_8_MP7624_S8_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_8_MP7624_S8_R2_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_9_MP7624_S9_R1_001.fastq.gz [Content-Type=application/octet-stream]... Copying gs://transfer-amlproject/RNPv2/20200304_9_MP7624_S9_R2_001.fastq.gz [Content-Type=application/octet-stream]... \ [138/138 files][240.6 GiB/240.6 GiB] 100% Done Operation completed over 138 objects/240.6 GiB.
! gsutil ls gs://amlproject/
gs://amlproject/MV-4-11.bai gs://amlproject/MV-4-11.bam gs://amlproject/Chip/ gs://amlproject/RNA/ gs://amlproject/RNPv2/
sampleset='RNPv3'
terra.uploadFromFolder('amlproject','RNPv2/',
'broad-firecloud-ccle/hg38_RNAseq',samplesetname=sampleset,
fformat="fastqR1R2", sep='_MP7624')
please be sure you gave access to your terra email account access to this bucket
['RNPv2/20200304_10_MP7624_S10_R1_001.fastq.gz', 'RNPv2/20200304_10_MP7624_S10_R2_001.fastq.gz', 'RNPv2/20200304_11_MP7624_S11_R1_001.fastq.gz', 'RNPv2/20200304_11_MP7624_S11_R2_001.fastq.gz', 'RNPv2/20200304_12_MP7624_S12_R1_001.fastq.gz', 'RNPv2/20200304_12_MP7624_S12_R2_001.fastq.gz', 'RNPv2/20200304_13_MP7624_S13_R1_001.fastq.gz', 'RNPv2/20200304_13_MP7624_S13_R2_001.fastq.gz', 'RNPv2/20200304_14_MP7624_S14_R1_001.fastq.gz', 'RNPv2/20200304_14_MP7624_S14_R2_001.fastq.gz', 'RNPv2/20200304_15_MP7624_S15_R1_001.fastq.gz', 'RNPv2/20200304_15_MP7624_S15_R2_001.fastq.gz', 'RNPv2/20200304_16_MP7624_S16_R1_001.fastq.gz', 'RNPv2/20200304_16_MP7624_S16_R2_001.fastq.gz', 'RNPv2/20200304_17_MP7624_S17_R1_001.fastq.gz', 'RNPv2/20200304_17_MP7624_S17_R2_001.fastq.gz', 'RNPv2/20200304_18_MP7624_S18_R1_001.fastq.gz', 'RNPv2/20200304_18_MP7624_S18_R2_001.fastq.gz', 'RNPv2/20200304_19_MP7624_S19_R1_001.fastq.gz', 'RNPv2/20200304_19_MP7624_S19_R2_001.fastq.gz', 'RNPv2/20200304_1_MP7624_S1_R1_001.fastq.gz', 'RNPv2/20200304_1_MP7624_S1_R2_001.fastq.gz', 'RNPv2/20200304_20_MP7624_S20_R1_001.fastq.gz', 'RNPv2/20200304_20_MP7624_S20_R2_001.fastq.gz', 'RNPv2/20200304_21_MP7624_S21_R1_001.fastq.gz', 'RNPv2/20200304_21_MP7624_S21_R2_001.fastq.gz', 'RNPv2/20200304_22_MP7624_S22_R1_001.fastq.gz', 'RNPv2/20200304_22_MP7624_S22_R2_001.fastq.gz', 'RNPv2/20200304_23_MP7624_S23_R1_001.fastq.gz', 'RNPv2/20200304_23_MP7624_S23_R2_001.fastq.gz', 'RNPv2/20200304_24_MP7624_S24_R1_001.fastq.gz', 'RNPv2/20200304_24_MP7624_S24_R2_001.fastq.gz', 'RNPv2/20200304_25_MP7624_S25_R1_001.fastq.gz', 'RNPv2/20200304_25_MP7624_S25_R2_001.fastq.gz', 'RNPv2/20200304_26_MP7624_S26_R1_001.fastq.gz', 'RNPv2/20200304_26_MP7624_S26_R2_001.fastq.gz', 'RNPv2/20200304_27_MP7624_S27_R1_001.fastq.gz', 'RNPv2/20200304_27_MP7624_S27_R2_001.fastq.gz', 'RNPv2/20200304_28_MP7624_S28_R1_001.fastq.gz', 'RNPv2/20200304_28_MP7624_S28_R2_001.fastq.gz', 'RNPv2/20200304_29_MP7624_S29_R1_001.fastq.gz', 'RNPv2/20200304_29_MP7624_S29_R2_001.fastq.gz', 'RNPv2/20200304_2_MP7624_S2_R1_001.fastq.gz', 'RNPv2/20200304_2_MP7624_S2_R2_001.fastq.gz', 'RNPv2/20200304_30_MP7624_S30_R1_001.fastq.gz', 'RNPv2/20200304_30_MP7624_S30_R2_001.fastq.gz', 'RNPv2/20200304_31_MP7624_S31_R1_001.fastq.gz', 'RNPv2/20200304_31_MP7624_S31_R2_001.fastq.gz', 'RNPv2/20200304_32_MP7624_S32_R1_001.fastq.gz', 'RNPv2/20200304_32_MP7624_S32_R2_001.fastq.gz', 'RNPv2/20200304_33_MP7624_S33_R1_001.fastq.gz', 'RNPv2/20200304_33_MP7624_S33_R2_001.fastq.gz', 'RNPv2/20200304_34_MP7624_S34_R1_001.fastq.gz', 'RNPv2/20200304_34_MP7624_S34_R2_001.fastq.gz', 'RNPv2/20200304_35_MP7624_S35_R1_001.fastq.gz', 'RNPv2/20200304_35_MP7624_S35_R2_001.fastq.gz', 'RNPv2/20200304_36_MP7624_S36_R1_001.fastq.gz', 'RNPv2/20200304_36_MP7624_S36_R2_001.fastq.gz', 'RNPv2/20200304_37_MP7624_S37_R1_001.fastq.gz', 'RNPv2/20200304_37_MP7624_S37_R2_001.fastq.gz', 'RNPv2/20200304_38_MP7624_S38_R1_001.fastq.gz', 'RNPv2/20200304_38_MP7624_S38_R2_001.fastq.gz', 'RNPv2/20200304_39_MP7624_S39_R1_001.fastq.gz', 'RNPv2/20200304_39_MP7624_S39_R2_001.fastq.gz', 'RNPv2/20200304_3_MP7624_S3_R1_001.fastq.gz', 'RNPv2/20200304_3_MP7624_S3_R2_001.fastq.gz', 'RNPv2/20200304_40_MP7624_S40_R1_001.fastq.gz', 'RNPv2/20200304_40_MP7624_S40_R2_001.fastq.gz', 'RNPv2/20200304_41_MP7624_S41_R1_001.fastq.gz', 'RNPv2/20200304_41_MP7624_S41_R2_001.fastq.gz', 'RNPv2/20200304_42_MP7624_S42_R1_001.fastq.gz', 'RNPv2/20200304_42_MP7624_S42_R2_001.fastq.gz', 'RNPv2/20200304_43_MP7624_S43_R1_001.fastq.gz', 'RNPv2/20200304_43_MP7624_S43_R2_001.fastq.gz', 'RNPv2/20200304_44_MP7624_S44_R1_001.fastq.gz', 'RNPv2/20200304_44_MP7624_S44_R2_001.fastq.gz', 'RNPv2/20200304_45_MP7624_S45_R1_001.fastq.gz', 'RNPv2/20200304_45_MP7624_S45_R2_001.fastq.gz', 'RNPv2/20200304_46_MP7624_S46_R1_001.fastq.gz', 'RNPv2/20200304_46_MP7624_S46_R2_001.fastq.gz', 'RNPv2/20200304_47_MP7624_S47_R1_001.fastq.gz', 'RNPv2/20200304_47_MP7624_S47_R2_001.fastq.gz', 'RNPv2/20200304_48_MP7624_S48_R1_001.fastq.gz', 'RNPv2/20200304_48_MP7624_S48_R2_001.fastq.gz', 'RNPv2/20200304_49_MP7624_S49_R1_001.fastq.gz', 'RNPv2/20200304_49_MP7624_S49_R2_001.fastq.gz', 'RNPv2/20200304_4_MP7624_S4_R1_001.fastq.gz', 'RNPv2/20200304_4_MP7624_S4_R2_001.fastq.gz', 'RNPv2/20200304_50_MP7624_S50_R1_001.fastq.gz', 'RNPv2/20200304_50_MP7624_S50_R2_001.fastq.gz', 'RNPv2/20200304_51_MP7624_S51_R1_001.fastq.gz', 'RNPv2/20200304_51_MP7624_S51_R2_001.fastq.gz', 'RNPv2/20200304_52_MP7624_S52_R1_001.fastq.gz', 'RNPv2/20200304_52_MP7624_S52_R2_001.fastq.gz', 'RNPv2/20200304_53_MP7624_S53_R1_001.fastq.gz', 'RNPv2/20200304_53_MP7624_S53_R2_001.fastq.gz', 'RNPv2/20200304_54_MP7624_S54_R1_001.fastq.gz', 'RNPv2/20200304_54_MP7624_S54_R2_001.fastq.gz', 'RNPv2/20200304_55_MP7624_S55_R1_001.fastq.gz', 'RNPv2/20200304_55_MP7624_S55_R2_001.fastq.gz', 'RNPv2/20200304_56_MP7624_S56_R1_001.fastq.gz', 'RNPv2/20200304_56_MP7624_S56_R2_001.fastq.gz', 'RNPv2/20200304_57_MP7624_S57_R1_001.fastq.gz', 'RNPv2/20200304_57_MP7624_S57_R2_001.fastq.gz', 'RNPv2/20200304_58_MP7624_S58_R1_001.fastq.gz', 'RNPv2/20200304_58_MP7624_S58_R2_001.fastq.gz', 'RNPv2/20200304_59_MP7624_S59_R1_001.fastq.gz', 'RNPv2/20200304_59_MP7624_S59_R2_001.fastq.gz', 'RNPv2/20200304_5_MP7624_S5_R1_001.fastq.gz', 'RNPv2/20200304_5_MP7624_S5_R2_001.fastq.gz', 'RNPv2/20200304_60_MP7624_S60_R1_001.fastq.gz', 'RNPv2/20200304_60_MP7624_S60_R2_001.fastq.gz', 'RNPv2/20200304_61_MP7624_S61_R1_001.fastq.gz', 'RNPv2/20200304_61_MP7624_S61_R2_001.fastq.gz', 'RNPv2/20200304_62_MP7624_S62_R1_001.fastq.gz', 'RNPv2/20200304_62_MP7624_S62_R2_001.fastq.gz', 'RNPv2/20200304_63_MP7624_S63_R1_001.fastq.gz', 'RNPv2/20200304_63_MP7624_S63_R2_001.fastq.gz', 'RNPv2/20200304_64_MP7624_S64_R1_001.fastq.gz', 'RNPv2/20200304_64_MP7624_S64_R2_001.fastq.gz', 'RNPv2/20200304_65_MP7624_S65_R1_001.fastq.gz', 'RNPv2/20200304_65_MP7624_S65_R2_001.fastq.gz', 'RNPv2/20200304_66_MP7624_S66_R1_001.fastq.gz', 'RNPv2/20200304_66_MP7624_S66_R2_001.fastq.gz', 'RNPv2/20200304_67_MP7624_S67_R1_001.fastq.gz', 'RNPv2/20200304_67_MP7624_S67_R2_001.fastq.gz', 'RNPv2/20200304_68_MP7624_S68_R1_001.fastq.gz', 'RNPv2/20200304_68_MP7624_S68_R2_001.fastq.gz', 'RNPv2/20200304_69_MP7624_S69_R1_001.fastq.gz', 'RNPv2/20200304_69_MP7624_S69_R2_001.fastq.gz', 'RNPv2/20200304_6_MP7624_S6_R1_001.fastq.gz', 'RNPv2/20200304_6_MP7624_S6_R2_001.fastq.gz', 'RNPv2/20200304_7_MP7624_S7_R1_001.fastq.gz', 'RNPv2/20200304_7_MP7624_S7_R2_001.fastq.gz', 'RNPv2/20200304_8_MP7624_S8_R1_001.fastq.gz', 'RNPv2/20200304_8_MP7624_S8_R2_001.fastq.gz', 'RNPv2/20200304_9_MP7624_S9_R1_001.fastq.gz', 'RNPv2/20200304_9_MP7624_S9_R2_001.fastq.gz']
> /home/jeremie/JKBio/TerraFunction.py(227)uploadFromFolder()
226 ipdb.set_trace()
--> 227 df = pd.DataFrame(data)
228 print(df)
ipdb> c
sample_id fastq1 \
0 20200304_10 gs://amlproject/RNPv2/20200304_10_MP7624_S10_R...
1 20200304_11 gs://amlproject/RNPv2/20200304_11_MP7624_S11_R...
2 20200304_12 gs://amlproject/RNPv2/20200304_12_MP7624_S12_R...
3 20200304_13 gs://amlproject/RNPv2/20200304_13_MP7624_S13_R...
4 20200304_14 gs://amlproject/RNPv2/20200304_14_MP7624_S14_R...
.. ... ...
64 20200304_69 gs://amlproject/RNPv2/20200304_69_MP7624_S69_R...
65 20200304_6 gs://amlproject/RNPv2/20200304_6_MP7624_S6_R1_...
66 20200304_7 gs://amlproject/RNPv2/20200304_7_MP7624_S7_R1_...
67 20200304_8 gs://amlproject/RNPv2/20200304_8_MP7624_S8_R1_...
68 20200304_9 gs://amlproject/RNPv2/20200304_9_MP7624_S9_R1_...
fastq2
0 gs://amlproject/RNPv2/20200304_10_MP7624_S10_R...
1 gs://amlproject/RNPv2/20200304_11_MP7624_S11_R...
2 gs://amlproject/RNPv2/20200304_12_MP7624_S12_R...
3 gs://amlproject/RNPv2/20200304_13_MP7624_S13_R...
4 gs://amlproject/RNPv2/20200304_14_MP7624_S14_R...
.. ...
64 gs://amlproject/RNPv2/20200304_69_MP7624_S69_R...
65 gs://amlproject/RNPv2/20200304_6_MP7624_S6_R2_...
66 gs://amlproject/RNPv2/20200304_7_MP7624_S7_R2_...
67 gs://amlproject/RNPv2/20200304_8_MP7624_S8_R2_...
68 gs://amlproject/RNPv2/20200304_9_MP7624_S9_R2_...
[69 rows x 3 columns]
Successfully imported 69 participants.
Successfully imported 69 samples.
Successfully imported 1 sample sets:
* MAX_AML_RNPv2 (69 samples)
wm = dm.WorkspaceManager('broad-firecloud-ccle/hg38_RNAseq')
submission_id = wm.create_submission("star_v1-0_BETA_cfg", sampleset, 'sample_set',expression='this.samples')
terra.waitForSubmission('broad-firecloud-ccle/hg38_RNAseq', submission_id)
Successfully created submission 2ad41571-b46e-4c3b-be51-44e800717d2a.
submission_id = wm.create_submission("rsem_v1-0_BETA_cfg",
sampleset,'sample_set',expression='this.samples')
terra.waitForSubmission('broad-firecloud-ccle/hg38_RNAseq', submission_id)
Successfully created submission cfd65243-2093-4007-9b21-c5b09c9fc875. 1tatus is: Failed for 0 jobs in submission 0. 2 mn elapsed. 10 11 12 13 14 15 16 17 18 19 2 20 21 22 23 24 25 26 27 28 29 3 30 31 32 33 34 35 36 37 38 39 4 40 41 42 43 44 45 46 47 48 49 5 50 51 52 53 54 55 56 57 58 59 6 60 61 62 63 64 65 66 67 68 69 7 70 71 72 73 8 9 0.0 of jobs Succeeded in submission 0.
----------------------------------------------- RuntimeError Traceback (most recent call last) <ipython-input-4-50c8187cd693> in <module> 1 submission_id = wm.create_submission("rsem_v1-0_BETA_cfg", 2 sampleset,'sample_set',expression='this.samples') ----> 3 terra.waitForSubmission('broad-firecloud-ccle/hg38_RNAseq', submission_id) ~/JKBio/TerraFunction.py in waitForSubmission(workspace, submissions, raise_errors) 93 print(str(done / (done + failed)) + " of jobs Succeeded in submission " + str(scount) + ".") 94 if len(failed_submission) > 0 and raise_errors: ---> 95 raise RuntimeError(str(len(failed_submission)) + " failed submission") 96 return failed_submission 97 # print and return well formated data RuntimeError: 73 failed submission
submission_id = wm.create_submission("rsem_aggregate_results_v1-0_BETA_cfg",
sampleset)
terra.waitForSubmission('broad-firecloud-ccle/hg38_RNAseq', submission_id)
Successfully created submission 9be600dc-4db0-4af1-b607-503800cc45fc. 1.0 of jobs Succeeded in submission 0.sion 0. 210 mn elapsed.
[]
results = wm.get_sample_sets().loc[sampleset]
rsem_genes_expected_count = results['rsem_genes_expected_count']
results
samples [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1, 20... rsem_transcripts_isopct gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcba... rsem_transcripts_tpm gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcba... rsem_transcripts_expected_count gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcba... rsem_genes_tpm gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcba... rsem_genes_expected_count gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcba... Name: RNPv3, dtype: object
mkdir ../../data/RNPv3
! gsutil cp $rsem_genes_expected_count ../../data/RNPv3/
Copying gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcbabe2da/9be600dc-4db0-4af1-b607-503800cc45fc/rsem_aggregate_results_workflow/abca308c-59a2-4ad5-8c87-9e4bdf407411/call-rsem_aggregate_results/RNPv3.rsem_genes_expected_count.txt.gz... / [1 files][ 4.6 MiB/ 4.6 MiB] Operation completed over 1 objects/4.6 MiB.
file = '../../data/RNPv3/'+rsem_genes_expected_count.split('/')[-1]
file
'../../data/RNPv3/RNPv3.rsem_genes_expected_count.txt.gz'
! gunzip $file
file
'../../data/RNPv3/RNPv3.rsem_genes_expected_count.txt.gz'
rsem_genes_expected_count = pd.read_csv(file[:-3], sep='\t')
rsem_genes_expected_count = pd.read_csv("../../data/RNPv3/RNPv3.rsem_genes_expected_count.txt", sep='\t')
data = rsem_genes_expected_count.drop("transcript_id(s)",1)
data["gene_id"] = h.convertGenes(data['gene_id'])[0]
you need access to taiga for this (https://pypi.org/project/taigapy/) 20702 could not be parsed... we don't have all genes already
data=data.set_index('gene_id')
data
| 1 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | ... | 67 | 68 | 69 | 7 | 70 | 71 | 72 | 73 | 8 | 9 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| gene_id | |||||||||||||||||||||
| TSPAN6 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| TNMD | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| DPM1 | 1619.00 | 2465.00 | 1701.00 | 1535.00 | 1863.00 | 2093.00 | 2027.00 | 2202.00 | 2148.00 | 2235.00 | ... | 1620.00 | 1840.00 | 1729.00 | 1983.00 | 1926.0 | 1846.00 | 1915.00 | 2633.00 | 2451.00 | 2378.00 |
| SCYL3 | 464.57 | 846.12 | 672.69 | 603.75 | 577.41 | 617.97 | 601.43 | 545.49 | 575.14 | 536.97 | ... | 430.78 | 460.04 | 437.36 | 542.42 | 572.5 | 507.48 | 580.49 | 713.56 | 670.02 | 576.38 |
| C1orf112 | 780.43 | 1031.90 | 755.31 | 676.25 | 1232.70 | 1209.00 | 1309.60 | 1370.50 | 1245.90 | 1257.10 | ... | 949.22 | 1277.00 | 1032.60 | 1163.60 | 783.5 | 1088.50 | 1184.50 | 1572.40 | 1481.00 | 1332.90 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| ERCC-00164 | 3.00 | 5.00 | 8.00 | 2.00 | 2.00 | 1.00 | 2.00 | 1.00 | 3.00 | 3.00 | ... | 1.00 | 1.00 | 5.00 | 1.00 | 6.0 | 3.00 | 3.00 | 4.00 | 2.00 | 4.00 |
| ERCC-00165 | 215.00 | 594.00 | 424.00 | 509.00 | 136.00 | 88.00 | 165.00 | 258.00 | 161.00 | 163.00 | ... | 93.00 | 139.00 | 87.00 | 127.00 | 628.0 | 207.00 | 151.00 | 241.00 | 187.00 | 176.00 |
| ERCC-00168 | 3.00 | 12.00 | 9.00 | 8.00 | 0.00 | 8.00 | 0.00 | 5.00 | 5.00 | 1.00 | ... | 3.00 | 4.00 | 1.00 | 3.00 | 8.0 | 5.00 | 4.00 | 7.00 | 8.00 | 3.00 |
| ERCC-00170 | 66.00 | 205.00 | 133.00 | 211.00 | 57.00 | 40.00 | 73.00 | 94.00 | 42.00 | 40.00 | ... | 41.00 | 56.00 | 33.00 | 50.00 | 141.0 | 72.00 | 92.00 | 110.00 | 89.00 | 88.00 |
| ERCC-00171 | 13554.00 | 40900.00 | 29090.00 | 33242.00 | 10039.00 | 6399.00 | 10836.00 | 15684.00 | 9526.00 | 8893.00 | ... | 7058.00 | 7576.00 | 5882.00 | 8381.00 | 47913.0 | 12046.00 | 10447.00 | 17316.00 | 10492.00 | 12389.00 |
58813 rows × 73 columns
rename = {"1": "mr120-MV411-RNP_IRF2BP2-r4",
"2": "mr121-MV411-RNP_IRF2BP2-r5",
"3": "mr122-MV411-RNP_IRF2BP2-r6",
"4": "mr123-MV411-RNP_IRF8-r4",
"5": "mr124-MV411-RNP_IRF8-r5",
"6": "mr125-MV411-RNP_IRF8-r6",
"7": "mr126-MV411-RNP_MEF2D-r4",
"8": "mr127-MV411-RNP_MEF2D-r5",
"9": "mr128-MV411-RNP_MEF2D-r6",
"10": "mr129-MV411-RNP_MYC-r4",
"11": "mr130-MV411-RNP_MYC-r5",
"12": "mr131-MV411-RNP_MYC-r6",
"13": "mr132-MV411-RNP_RUNX1-r4",
"14": "mr133-MV411-RNP_RUNX1-r5",
"15": "mr134-MV411-RNP_RUNX1-r6",
"16": "mr135-MV411-RNP_RUNX2-r4",
"17": "mr136-MV411-RNP_RUNX2-r5",
"18": "mr137-MV411-RNP_RUNX2-r6",
"19": "mr138-MV411-RNP_SPI1-r4",
"20": "mr139-MV411-RNP_SPI1-r5",
"21": "mr140-MV411-RNP_SPI1-r6",
"22": "mr141-MV411-RNP_ZMYND8-r4",
"23": "mr142-MV411-RNP_ZMYND8-r5",
"24": "mr143-MV411-RNP_ZMYND8-r6",
"25": "mr144-MV411-RNP_LMO2-r4",
"26": "mr145-MV411-RNP_LMO2-r5",
"27": "mr146-MV411-RNP_LMO2-r6",
"28": "mr147-MV411-RNP_LYL1-r4",
"29": "mr148-MV411-RNP_LYL1-r5",
"30": "mr149-MV411-RNP_LYL1-r6",
"31": "mr150-MV411-RNP_MAX-r4",
"32": "mr151-MV411-RNP_MAX-r5",
"33": "mr152-MV411-RNP_MAX-r6",
"34": "mr153-MV411-RNP_ZEB2-r4",
"35": "mr154-MV411-RNP_ZEB2-r5",
"36": "mr155-MV411-RNP_ZEB2-r6",
"37": "mr156-MV411-RNP_MEF2C-r4",
"38": "mr157-MV411-RNP_MEF2C-r5",
"39": "mr158-MV411-RNP_MEF2C-r6",
"40": "mr159-MV411-RNP_MEIS1-r4",
"41": "mr160-MV411-RNP_MEIS1-r5",
"42": "mr161-MV411-RNP_MEIS1-r6",
"43": "mr162-MV411-RNP_FLI1-r4",
"44": "mr163-MV411-RNP_FLI1-r5",
"45": "mr164-MV411-RNP_FLI1-r6",
"46": "mr165-MV411-RNP_ELF2-r4",
"47": "mr166-MV411-RNP_ELF2-r5",
"48": "mr167-MV411-RNP_ELF2-r6",
"49": "mr168-MV411-RNP_GFI1-r4",
"50": "mr169-MV411-RNP_GFI1-r5",
"51": "mr170-MV411-RNP_GFI1-r6",
"52": "mr171-MV411-RNP_IKZF1-r4",
"53": "mr172-MV411-RNP_IKZF1-r5",
"54": "mr173-MV411-RNP_IKZF1-r6",
"55": "mr174-MV411-RNP_CEBPA-r4",
"56": "mr175-MV411-RNP_CEBPA-r5",
"57": "mr176-MV411-RNP_CEBPA-r6",
"58": "mr177-MV411-RNP_MYB-r4",
"59": "mr178-MV411-RNP_MYB-r5",
"60": "mr179-MV411-RNP_MYB-r6",
"61": "mr180-MV411-RNP_MYBL2-r1",
"62": "mr181-MV411-RNP_MYBL2-r2",
"63": "mr182-MV411-RNP_MYBL2-r3",
"64": "mr183-MV411-RNP_HOXA9-r4",
"65": "mr184-MV411-RNP_HOXA9-r5",
"66": "mr185-MV411-RNP_HOXA9-r6",
"67": "mr186-MV411-RNP_AAVS1-r1",
"68": "mr187-MV411-RNP_AAVS1-r2",
"69": "mr188-MV411-RNP_AAVS1-r3",
"70": "mr189-MV411-RNP_SP1-r4",
"71": "mr190-MV411-RNP_SP1-r5",
"72": "mr191-MV411-RNP_SP1-r6",
"73": "mr192-MV411-RNP_SP1-r7"}
data.columns
Index(['1', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '2',
'20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '3', '30',
'31', '32', '33', '34', '35', '36', '37', '38', '39', '4', '40', '41',
'42', '43', '44', '45', '46', '47', '48', '49', '5', '50', '51', '52',
'53', '54', '55', '56', '57', '58', '59', '6', '60', '61', '62', '63',
'64', '65', '66', '67', '68', '69', '7', '70', '71', '72', '73', '8',
'9'],
dtype='object')
data.columns = [rename[i] for i in data.columns]
data
| mr120-MV411-RNP_IRF2BP2-r4 | mr129-MV411-RNP_MYC-r4 | mr130-MV411-RNP_MYC-r5 | mr131-MV411-RNP_MYC-r6 | mr132-MV411-RNP_RUNX1-r4 | mr133-MV411-RNP_RUNX1-r5 | mr134-MV411-RNP_RUNX1-r6 | mr135-MV411-RNP_RUNX2-r4 | mr136-MV411-RNP_RUNX2-r5 | mr137-MV411-RNP_RUNX2-r6 | ... | mr186-MV411-RNP_AAVS1-r1 | mr187-MV411-RNP_AAVS1-r2 | mr188-MV411-RNP_AAVS1-r3 | mr126-MV411-RNP_MEF2D-r4 | mr189-MV411-RNP_SP1-r4 | mr190-MV411-RNP_SP1-r5 | mr191-MV411-RNP_SP1-r6 | mr192-MV411-RNP_SP1-r7 | mr127-MV411-RNP_MEF2D-r5 | mr128-MV411-RNP_MEF2D-r6 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| gene_id | |||||||||||||||||||||
| TSPAN6 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| TNMD | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| DPM1 | 1619.00 | 2465.00 | 1701.00 | 1535.00 | 1863.00 | 2093.00 | 2027.00 | 2202.00 | 2148.00 | 2235.00 | ... | 1620.00 | 1840.00 | 1729.00 | 1983.00 | 1926.0 | 1846.00 | 1915.00 | 2633.00 | 2451.00 | 2378.00 |
| SCYL3 | 464.57 | 846.12 | 672.69 | 603.75 | 577.41 | 617.97 | 601.43 | 545.49 | 575.14 | 536.97 | ... | 430.78 | 460.04 | 437.36 | 542.42 | 572.5 | 507.48 | 580.49 | 713.56 | 670.02 | 576.38 |
| C1orf112 | 780.43 | 1031.90 | 755.31 | 676.25 | 1232.70 | 1209.00 | 1309.60 | 1370.50 | 1245.90 | 1257.10 | ... | 949.22 | 1277.00 | 1032.60 | 1163.60 | 783.5 | 1088.50 | 1184.50 | 1572.40 | 1481.00 | 1332.90 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| ERCC-00164 | 3.00 | 5.00 | 8.00 | 2.00 | 2.00 | 1.00 | 2.00 | 1.00 | 3.00 | 3.00 | ... | 1.00 | 1.00 | 5.00 | 1.00 | 6.0 | 3.00 | 3.00 | 4.00 | 2.00 | 4.00 |
| ERCC-00165 | 215.00 | 594.00 | 424.00 | 509.00 | 136.00 | 88.00 | 165.00 | 258.00 | 161.00 | 163.00 | ... | 93.00 | 139.00 | 87.00 | 127.00 | 628.0 | 207.00 | 151.00 | 241.00 | 187.00 | 176.00 |
| ERCC-00168 | 3.00 | 12.00 | 9.00 | 8.00 | 0.00 | 8.00 | 0.00 | 5.00 | 5.00 | 1.00 | ... | 3.00 | 4.00 | 1.00 | 3.00 | 8.0 | 5.00 | 4.00 | 7.00 | 8.00 | 3.00 |
| ERCC-00170 | 66.00 | 205.00 | 133.00 | 211.00 | 57.00 | 40.00 | 73.00 | 94.00 | 42.00 | 40.00 | ... | 41.00 | 56.00 | 33.00 | 50.00 | 141.0 | 72.00 | 92.00 | 110.00 | 89.00 | 88.00 |
| ERCC-00171 | 13554.00 | 40900.00 | 29090.00 | 33242.00 | 10039.00 | 6399.00 | 10836.00 | 15684.00 | 9526.00 | 8893.00 | ... | 7058.00 | 7576.00 | 5882.00 | 8381.00 | 47913.0 | 12046.00 | 10447.00 | 17316.00 | 10492.00 | 12389.00 |
58813 rows × 73 columns
filter some more
toremove = np.argwhere(data.values.var(1)==0)
toremove.ravel()
array([ 1, 15, 24, ..., 58714, 58715, 58718])
toremove.shape
(19991, 1)
data = data.drop(data.iloc[toremove.ravel()].index,0)
data.shape
(38787, 73)
ERCC = data[~data.index.str.contains('ENSG00')]
ensg = data[data.index.str.contains('ENSG00')]
data = data[~data.index.str.contains('ENSG00')]
renormalize the data
len(ERCC)
26672
ERCC
| mr120-MV411-RNP_IRF2BP2-r4 | mr129-MV411-RNP_MYC-r4 | mr130-MV411-RNP_MYC-r5 | mr131-MV411-RNP_MYC-r6 | mr132-MV411-RNP_RUNX1-r4 | mr133-MV411-RNP_RUNX1-r5 | mr134-MV411-RNP_RUNX1-r6 | mr135-MV411-RNP_RUNX2-r4 | mr136-MV411-RNP_RUNX2-r5 | mr137-MV411-RNP_RUNX2-r6 | ... | mr186-MV411-RNP_AAVS1-r1 | mr187-MV411-RNP_AAVS1-r2 | mr188-MV411-RNP_AAVS1-r3 | mr126-MV411-RNP_MEF2D-r4 | mr189-MV411-RNP_SP1-r4 | mr190-MV411-RNP_SP1-r5 | mr191-MV411-RNP_SP1-r6 | mr192-MV411-RNP_SP1-r7 | mr127-MV411-RNP_MEF2D-r5 | mr128-MV411-RNP_MEF2D-r6 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| gene_id | |||||||||||||||||||||
| TSPAN6 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| DPM1 | 1619.00 | 2465.00 | 1701.00 | 1535.00 | 1863.00 | 2093.00 | 2027.00 | 2202.00 | 2148.00 | 2235.00 | ... | 1620.00 | 1840.00 | 1729.00 | 1983.00 | 1926.0 | 1846.00 | 1915.00 | 2633.00 | 2451.00 | 2378.00 |
| SCYL3 | 464.57 | 846.12 | 672.69 | 603.75 | 577.41 | 617.97 | 601.43 | 545.49 | 575.14 | 536.97 | ... | 430.78 | 460.04 | 437.36 | 542.42 | 572.5 | 507.48 | 580.49 | 713.56 | 670.02 | 576.38 |
| C1orf112 | 780.43 | 1031.90 | 755.31 | 676.25 | 1232.70 | 1209.00 | 1309.60 | 1370.50 | 1245.90 | 1257.10 | ... | 949.22 | 1277.00 | 1032.60 | 1163.60 | 783.5 | 1088.50 | 1184.50 | 1572.40 | 1481.00 | 1332.90 |
| FGR | 1443.00 | 8556.00 | 6387.00 | 5955.00 | 2359.00 | 2615.00 | 2258.00 | 3340.00 | 3229.00 | 3466.00 | ... | 2323.00 | 2401.00 | 2230.00 | 3680.00 | 2016.0 | 2285.00 | 2384.00 | 3106.00 | 4706.00 | 4308.00 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| ERCC-00164 | 3.00 | 5.00 | 8.00 | 2.00 | 2.00 | 1.00 | 2.00 | 1.00 | 3.00 | 3.00 | ... | 1.00 | 1.00 | 5.00 | 1.00 | 6.0 | 3.00 | 3.00 | 4.00 | 2.00 | 4.00 |
| ERCC-00165 | 215.00 | 594.00 | 424.00 | 509.00 | 136.00 | 88.00 | 165.00 | 258.00 | 161.00 | 163.00 | ... | 93.00 | 139.00 | 87.00 | 127.00 | 628.0 | 207.00 | 151.00 | 241.00 | 187.00 | 176.00 |
| ERCC-00168 | 3.00 | 12.00 | 9.00 | 8.00 | 0.00 | 8.00 | 0.00 | 5.00 | 5.00 | 1.00 | ... | 3.00 | 4.00 | 1.00 | 3.00 | 8.0 | 5.00 | 4.00 | 7.00 | 8.00 | 3.00 |
| ERCC-00170 | 66.00 | 205.00 | 133.00 | 211.00 | 57.00 | 40.00 | 73.00 | 94.00 | 42.00 | 40.00 | ... | 41.00 | 56.00 | 33.00 | 50.00 | 141.0 | 72.00 | 92.00 | 110.00 | 89.00 | 88.00 |
| ERCC-00171 | 13554.00 | 40900.00 | 29090.00 | 33242.00 | 10039.00 | 6399.00 | 10836.00 | 15684.00 | 9526.00 | 8893.00 | ... | 7058.00 | 7576.00 | 5882.00 | 8381.00 | 47913.0 | 12046.00 | 10447.00 | 17316.00 | 10492.00 | 12389.00 |
26672 rows × 73 columns
ctf=pd.read_csv('../data/CTF.csv',header=None)[0].values.tolist()
ctf
['ARID2', 'CEBPA', 'CEBPE', 'E2F3', 'FLI1', 'FOSL2', 'GFI1', 'GFI1B', 'HHEX', 'IRF8', 'LYL1', 'MEF2C', 'MEF2D', 'MEIS1', 'MTF1', 'MYB', 'MYC', 'PLAGL2', 'RUNX1', 'RUNX2', 'RXRA', 'SETDB1', 'SNAPC5', 'SP1', 'SPI1', 'SREBF1', 'STAT5B', 'TERF2', 'TFAP4', 'ZEB2', 'ZFPM1', 'ZMYND8', 'LMO2', 'MAX', 'ELF2', 'ETV6', 'HOXA9', 'GATA2']
%%R
library('erccdashboard')
R[write to console]: Loading required package: ggplot2
R[write to console]: Loading required package: gridExtra
R[write to console]:
Attaching package: ‘gridExtra’
R[write to console]: The following object is masked from ‘package:Biobase’:
combine
R[write to console]: The following object is masked from ‘package:BiocGenerics’:
combine
ERCC = ERCC.astype(int)
ERCC['Feature'] = ERCC.index
sns.heatmap(np.log2(ERCC[ERCC.index.str.contains('ERCC-')][['mr186-MV411-RNP_AAVS1-r1', 'mr187-MV411-RNP_AAVS1-r2', 'mr188-MV411-RNP_AAVS1-r3','mr129-MV411-RNP_MYC-r4', 'mr189-MV411-RNP_SP1-r4', 'mr120-MV411-RNP_IRF2BP2-r4']].values / ERCC[ERCC.index.str.contains('ERCC-')][['mr186-MV411-RNP_AAVS1-r1', 'mr187-MV411-RNP_AAVS1-r2', 'mr188-MV411-RNP_AAVS1-r3','mr129-MV411-RNP_MYC-r4', 'mr189-MV411-RNP_SP1-r4', 'mr120-MV411-RNP_IRF2BP2-r4']].values.mean(0)+1))
<matplotlib.axes._subplots.AxesSubplot at 0x7f702dfab190>
experiments = list(set([i.split('-')[2] for i in ERCC.columns[:-1]]))
experiments.remove("RNP_AAVS1")
#TODO: compute the mass from concentration
###################################################
### code chunk number 3: defineInputData
###################################################
%R datType = "count" # "count" for RNA-Seq data, "array" for microarray data
%R isNorm = F # flag to indicate if input expression measures are already normalized, default is FALSE
%R filenameRoot = "RNPv2" # user defined filename prefix for results files
%R sample2Name = "AAAVS1" # name for sample 2 in the experiment
%R erccmix = "Single" # name of ERCC mixture design, "RatioPair" is default
%R erccdilution = 1/100 # dilution factor used for Ambion spike-in mixtures
%R spikeVol = 1 # volume (in microliters) of diluted spike-in mixture added to total RNA mass
%R choseFDR = 0.1 # user defined false discovery rate (FDR), default is 0.05
array([0.1])
cols = list(ERCC.columns)
cols.sort()
res={}
for val in experiments:
d = {}
e=0
d.update({
'Feature':'Feature'
})
for i in cols[:-1]:
if val+'-' in i:
e+=1
d.update({i: val.split('_')[-1]+'_'+str(e)})
d.update({
'mr186-MV411-RNP_AAVS1-r1': 'AAAVS1_1',
'mr187-MV411-RNP_AAVS1-r2': 'AAAVS1_2',
'mr188-MV411-RNP_AAVS1-r3': 'AAAVS1_3'
})
a = ERCC[list(d.keys())].rename(columns=d)
a.to_csv('../data/ERCC_estimation.csv', index=None)
val = val.split('_')[-1]
torm = 'RNPv2.'+val+'.AAAVS1.All.Pvals.csv'
! rm $torm
%R -i val print(val)
%R print(sample2Name)
%R a <- read.csv('../data/ERCC_estimation.csv')
%R print(head(a))
%R exDat = ''
%R totalRNAmass <- 0.5
try:
%R -i val exDat = initDat(datType = datType, isNorm = isNorm, exTable = a, filenameRoot = filenameRoot, sample1Name = val, sample2Name = sample2Name, erccmix = erccmix, erccdilution = erccdilution, spikeVol = spikeVol, totalRNAmass = totalRNAmass, choseFDR = choseFDR)
%R exDat = est_r_m(exDat)
%R exDat = dynRangePlot(exDat)
except Warning:
print("failed for "+val)
continue
except:
print('worked for '+val)
%R print(summary(exDat))
%R grid.arrange(exDat$Figures$dynRangePlot)
%R grid.arrange(exDat$Figures$r_mPlot)
%R grid.arrange(exDat$Figures$rangeResidPlot)
%R -o rm rm <- exDat$Results$r_m.res$r_m.mn
%R -o se se <- exDat$Results$r_m.res$r_m.mnse
res[val] = (rm[0],se[0])
rm: cannot remove 'RNPv2.RUNX1.AAAVS1.All.Pvals.csv': No such file or directory
[1] "RUNX1"
[1] "AAAVS1"
Feature RUNX1_1 RUNX1_2 RUNX1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 1863 2093 2027 1620 1840 1729
3 SCYL3 577 617 601 430 460 437
4 C1orf112 1232 1209 1309 949 1277 1032
5 FGR 2359 2615 2258 2323 2401 2230
6 CFH 8 9 7 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.RUNX1.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 17128 transcripts remain for analysis.
A total of 22 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00134 ERCC-00137 ERCC-00138
ERCC-00142 ERCC-00156
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
2247.25 2328 2294.25 1629 1895 1703
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
70
Outlier ERCCs for GLM r_m Estimate:
None
GLM log(r_m) estimate:
0.01362734
GLM log(r_m) estimate weighted s.e.:
0.1356891
Number of ERCCs in Mix 1 dyn range: 70
Number of ERCCs in Mix 2 dyn range: 70
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00031 ERCC-00097 ERCC-00120 ERCC-00168 ERCC-00073
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.MYC.AAAVS1.All.Pvals.csv': No such file or directory
[1] "MYC"
[1] "AAAVS1"
Feature MYC_1 MYC_2 MYC_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 2465 1701 1535 1620 1840 1729
3 SCYL3 846 672 603 430 460 437
4 C1orf112 1031 755 676 949 1277 1032
5 FGR 8556 6387 5955 2323 2401 2230
6 CFH 5 1 2 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.MYC.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 17015 transcripts remain for analysis.
A total of 11 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00024 ERCC-00048 ERCC-00057
ERCC-00061 ERCC-00075 ERCC-00083 ERCC-00098 ERCC-00117
ERCC-00142
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
2374 1836.5 1790.5 1643 1913.5 1714
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
81
Outlier ERCCs for GLM r_m Estimate:
None
GLM log(r_m) estimate:
-1.430577
GLM log(r_m) estimate weighted s.e.:
0.1054966
Number of ERCCs in Mix 1 dyn range: 81
Number of ERCCs in Mix 2 dyn range: 81
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00041 ERCC-00017 ERCC-00073 ERCC-00081 ERCC-00086
ERCC-00104 ERCC-00109 ERCC-00123 ERCC-00134 ERCC-00137
ERCC-00138 ERCC-00156
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.RUNX2.AAAVS1.All.Pvals.csv': No such file or directory
[1] "RUNX2"
[1] "AAAVS1"
Feature RUNX2_1 RUNX2_2 RUNX2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 2202 2148 2235 1620 1840 1729
3 SCYL3 545 575 536 430 460 437
4 C1orf112 1370 1245 1257 949 1277 1032
5 FGR 3340 3229 3466 2323 2401 2230
6 CFH 16 12 14 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.RUNX2.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 17042 transcripts remain for analysis.
A total of 20 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00109 ERCC-00117
ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142 ERCC-00156
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
2369.75 2268 2240.75 1638.75 1908.5 1710.75
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
72
Outlier ERCCs for GLM r_m Estimate:
None
GLM log(r_m) estimate:
-0.2177111
GLM log(r_m) estimate weighted s.e.:
0.1235403
Number of ERCCs in Mix 1 dyn range: 72
Number of ERCCs in Mix 2 dyn range: 72
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00073 ERCC-00097 ERCC-00134 ERCC-00104
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.LYL1.AAAVS1.All.Pvals.csv': No such file or directory
[1] "LYL1"
[1] "AAAVS1"
Feature LYL1_1 LYL1_2 LYL1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 1954 1656 2061 1620 1840 1729
3 SCYL3 572 428 588 430 460 437
4 C1orf112 1241 952 1107 949 1277 1032
5 FGR 2786 2397 3052 2323 2401 2230
6 CFH 7 14 13 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.LYL1.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 16829 transcripts remain for analysis.
A total of 20 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00137 ERCC-00138 ERCC-00142 ERCC-00156
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
2302 1853 2252 1669 1951 1743
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
72
Outlier ERCCs for GLM r_m Estimate:
ERCC-00144
GLM log(r_m) estimate:
0.1154768
GLM log(r_m) estimate weighted s.e.:
0.09762555
Number of ERCCs in Mix 1 dyn range: 72
Number of ERCCs in Mix 2 dyn range: 72
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00097 ERCC-00134 ERCC-00168 ERCC-00073 ERCC-00123
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.IKZF1.AAAVS1.All.Pvals.csv': No such file or directory
[1] "IKZF1"
[1] "AAAVS1"
Feature IKZF1_1 IKZF1_2 IKZF1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 1299 1529 2015 1620 1840 1729
3 SCYL3 361 406 571 430 460 437
4 C1orf112 836 967 1213 949 1277 1032
5 FGR 2082 1867 3154 2323 2401 2230
6 CFH 4 6 5 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.IKZF1.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 16752 transcripts remain for analysis.
A total of 22 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00013 ERCC-00016 ERCC-00017 ERCC-00024
ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075
ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104
ERCC-00109 ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138
ERCC-00142 ERCC-00156
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
1576.25 1526 2348.25 1677 1966.25 1753
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
70
Outlier ERCCs for GLM r_m Estimate:
None
GLM log(r_m) estimate:
0.2648816
GLM log(r_m) estimate weighted s.e.:
0.1193648
Number of ERCCs in Mix 1 dyn range: 70
Number of ERCCs in Mix 2 dyn range: 70
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00031 ERCC-00040 ERCC-00073 ERCC-00097 ERCC-00134
ERCC-00158 ERCC-00164 ERCC-00168
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.MEIS1.AAAVS1.All.Pvals.csv': No such file or directory
[1] "MEIS1"
[1] "AAAVS1"
Feature MEIS1_1 MEIS1_2 MEIS1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 1916 2046 2726 1620 1840 1729
3 SCYL3 477 554 683 430 460 437
4 C1orf112 1121 1128 1408 949 1277 1032
5 FGR 1935 2193 2556 2323 2401 2230
6 CFH 7 3 12 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.MEIS1.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 16907 transcripts remain for analysis.
A total of 21 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00117
ERCC-00123 ERCC-00134 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
2115 2194 2639.5 1658 1938 1730
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
71
Outlier ERCCs for GLM r_m Estimate:
None
GLM log(r_m) estimate:
0.2082356
GLM log(r_m) estimate weighted s.e.:
0.1646045
Number of ERCCs in Mix 1 dyn range: 71
Number of ERCCs in Mix 2 dyn range: 71
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00097 ERCC-00164 ERCC-00168 ERCC-00073 ERCC-00109
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.FLI1.AAAVS1.All.Pvals.csv': No such file or directory
[1] "FLI1"
[1] "AAAVS1"
Feature FLI1_1 FLI1_2 FLI1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 1892 2087 2588 1620 1840 1729
3 SCYL3 450 555 668 430 460 437
4 C1orf112 1196 1338 1591 949 1277 1032
5 FGR 2480 2602 3360 2323 2401 2230
6 CFH 3 3 4 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.FLI1.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 16821 transcripts remain for analysis.
A total of 21 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
2055 2218 2616 1669 1953 1743
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
71
Outlier ERCCs for GLM r_m Estimate:
ERCC-00039 ERCC-00019
GLM log(r_m) estimate:
0.2669788
GLM log(r_m) estimate weighted s.e.:
0.08613995
Number of ERCCs in Mix 1 dyn range: 71
Number of ERCCs in Mix 2 dyn range: 71
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00013 ERCC-00097 ERCC-00120 ERCC-00134 ERCC-00164
ERCC-00073
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.ELF2.AAAVS1.All.Pvals.csv': No such file or directory
[1] "ELF2"
[1] "AAAVS1"
Feature ELF2_1 ELF2_2 ELF2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 2516 1913 1971 1620 1840 1729
3 SCYL3 640 486 584 430 460 437
4 C1orf112 1315 1056 1278 949 1277 1032
5 FGR 3206 2242 2711 2323 2401 2230
6 CFH 4 8 5 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.ELF2.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 16904 transcripts remain for analysis.
A total of 21 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00134 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
2414.75 1863 2194 1658.5 1938.5 1731
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
71
Outlier ERCCs for GLM r_m Estimate:
None
GLM log(r_m) estimate:
0.1883588
GLM log(r_m) estimate weighted s.e.:
0.1001319
Number of ERCCs in Mix 1 dyn range: 71
Number of ERCCs in Mix 2 dyn range: 71
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00040 ERCC-00073 ERCC-00120 ERCC-00123 ERCC-00164
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.MYBL2.AAAVS1.All.Pvals.csv': No such file or directory
[1] "MYBL2"
[1] "AAAVS1"
Feature MYBL2_1 MYBL2_2 MYBL2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 1881 3921 1347 1620 1840 1729
3 SCYL3 469 1039 389 430 460 437
4 C1orf112 1108 2192 863 949 1277 1032
5 FGR 2573 5804 2117 2323 2401 2230
6 CFH 18 18 8 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.MYBL2.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 17053 transcripts remain for analysis.
A total of 21 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
1865 3829 1543 1638 1906 1710
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
71
Outlier ERCCs for GLM r_m Estimate:
ERCC-00039
GLM log(r_m) estimate:
0.4145379
GLM log(r_m) estimate weighted s.e.:
0.109987
Number of ERCCs in Mix 1 dyn range: 71
Number of ERCCs in Mix 2 dyn range: 71
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00013 ERCC-00031 ERCC-00073 ERCC-00077 ERCC-00097
ERCC-00120 ERCC-00134 ERCC-00147 ERCC-00158 ERCC-00168
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.IRF2BP2.AAAVS1.All.Pvals.csv': No such file or directory
[1] "IRF2BP2"
[1] "AAAVS1"
Feature IRF2BP2_1 IRF2BP2_2 IRF2BP2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 1619 1938 2043 1620 1840 1729
3 SCYL3 464 545 564 430 460 437
4 C1orf112 780 776 908 949 1277 1032
5 FGR 1443 1587 1765 2323 2401 2230
6 CFH 3 5 15 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.IRF2BP2.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 16582 transcripts remain for analysis.
A total of 13 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00024 ERCC-00048 ERCC-00057
ERCC-00061 ERCC-00075 ERCC-00083 ERCC-00086 ERCC-00098
ERCC-00117 ERCC-00142 ERCC-00156
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
1614.75 1750.75 2094 1704 1995 1776
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
79
Outlier ERCCs for GLM r_m Estimate:
None
GLM log(r_m) estimate:
-1.242144
GLM log(r_m) estimate weighted s.e.:
0.2116787
Number of ERCCs in Mix 1 dyn range: 79
Number of ERCCs in Mix 2 dyn range: 79
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00041 ERCC-00138 ERCC-00017 ERCC-00073 ERCC-00081
ERCC-00104 ERCC-00109 ERCC-00123 ERCC-00134 ERCC-00137
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.ZEB2.AAAVS1.All.Pvals.csv': No such file or directory
[1] "ZEB2"
[1] "AAAVS1"
Feature ZEB2_1 ZEB2_2 ZEB2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 2361 2261 1810 1620 1840 1729
3 SCYL3 531 527 481 430 460 437
4 C1orf112 1086 1059 945 949 1277 1032
5 FGR 2523 2566 2552 2323 2401 2230
6 CFH 1 1 0 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.ZEB2.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 16877 transcripts remain for analysis.
A total of 19 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00013 ERCC-00016 ERCC-00017 ERCC-00024
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00138 ERCC-00142
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
2103 2164 2008 1663 1944 1734
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
73
Outlier ERCCs for GLM r_m Estimate:
None
GLM log(r_m) estimate:
-0.1701759
GLM log(r_m) estimate weighted s.e.:
0.1445402
Number of ERCCs in Mix 1 dyn range: 73
Number of ERCCs in Mix 2 dyn range: 73
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00031 ERCC-00041 ERCC-00097 ERCC-00120 ERCC-00156
ERCC-00158 ERCC-00164 ERCC-00073 ERCC-00134 ERCC-00137
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.MYB.AAAVS1.All.Pvals.csv': No such file or directory
[1] "MYB"
[1] "AAAVS1"
Feature MYB_1 MYB_2 MYB_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 1695 1557 1288 1620 1840 1729
3 SCYL3 582 482 460 430 460 437
4 C1orf112 831 825 776 949 1277 1032
5 FGR 3674 3220 2807 2323 2401 2230
6 CFH 10 17 11 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.MYB.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 16938 transcripts remain for analysis.
A total of 19 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00109 ERCC-00117
ERCC-00137 ERCC-00138 ERCC-00142 ERCC-00156
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
1929.75 1799 1536 1654.75 1933 1726.75
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
73
Outlier ERCCs for GLM r_m Estimate:
ERCC-00039 ERCC-00144
GLM log(r_m) estimate:
-0.5666497
GLM log(r_m) estimate weighted s.e.:
0.1645544
Number of ERCCs in Mix 1 dyn range: 73
Number of ERCCs in Mix 2 dyn range: 73
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00040 ERCC-00097 ERCC-00104 ERCC-00120 ERCC-00123
ERCC-00134 ERCC-00164 ERCC-00168 ERCC-00073
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.HOXA9.AAAVS1.All.Pvals.csv': No such file or directory
[1] "HOXA9"
[1] "AAAVS1"
Feature HOXA9_1 HOXA9_2 HOXA9_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 1842 2075 2081 1620 1840 1729
3 SCYL3 516 575 602 430 460 437
4 C1orf112 1174 1241 1190 949 1277 1032
5 FGR 2239 2364 2372 2323 2401 2230
6 CFH 4 10 8 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.HOXA9.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 16777 transcripts remain for analysis.
A total of 21 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
2114 2247 2145 1675 1962 1750
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
71
Outlier ERCCs for GLM r_m Estimate:
None
GLM log(r_m) estimate:
0.4186265
GLM log(r_m) estimate weighted s.e.:
0.1449086
Number of ERCCs in Mix 1 dyn range: 71
Number of ERCCs in Mix 2 dyn range: 71
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00040 ERCC-00073 ERCC-00097 ERCC-00120 ERCC-00134
ERCC-00147 ERCC-00164
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.SP1.AAAVS1.All.Pvals.csv': No such file or directory
[1] "SP1"
[1] "AAAVS1"
Feature SP1_1 SP1_2 SP1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 1926 1846 1915 1620 1840 1729
3 SCYL3 572 507 580 430 460 437
4 C1orf112 783 1088 1184 949 1277 1032
5 FGR 2016 2285 2384 2323 2401 2230
6 CFH 15 13 15 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.SP1.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 16820 transcripts remain for analysis.
A total of 13 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00048
ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00083 ERCC-00098
ERCC-00117 ERCC-00142 ERCC-00156
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
2120.5 1938.25 2252 1669.25 1953.25 1743.25
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
79
Outlier ERCCs for GLM r_m Estimate:
None
GLM log(r_m) estimate:
-0.907468
GLM log(r_m) estimate weighted s.e.:
0.2659066
Number of ERCCs in Mix 1 dyn range: 79
Number of ERCCs in Mix 2 dyn range: 79
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00041 ERCC-00081 ERCC-00097 ERCC-00104 ERCC-00120
ERCC-00134 ERCC-00138 ERCC-00073 ERCC-00086 ERCC-00109
ERCC-00123 ERCC-00137
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.CEBPA.AAAVS1.All.Pvals.csv': No such file or directory
[1] "CEBPA"
[1] "AAAVS1"
Feature CEBPA_1 CEBPA_2 CEBPA_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 1418 547 1781 1620 1840 1729
3 SCYL3 459 177 589 430 460 437
4 C1orf112 908 426 1171 949 1277 1032
5 FGR 1659 648 1791 2323 2401 2230
6 CFH 7 1 10 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.CEBPA.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 16595 transcripts remain for analysis.
A total of 22 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00013 ERCC-00016 ERCC-00017 ERCC-00024
ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075
ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104
ERCC-00109 ERCC-00117 ERCC-00134 ERCC-00137 ERCC-00138
ERCC-00142 ERCC-00156
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
1763 743 2081.5 1704 1993.5 1775
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
70
Outlier ERCCs for GLM r_m Estimate:
None
GLM log(r_m) estimate:
0.04728101
GLM log(r_m) estimate weighted s.e.:
0.2244516
Number of ERCCs in Mix 1 dyn range: 70
Number of ERCCs in Mix 2 dyn range: 70
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00067 ERCC-00073 ERCC-00097 ERCC-00120 ERCC-00123
ERCC-00147 ERCC-00158 ERCC-00164 ERCC-00168
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.ZMYND8.AAAVS1.All.Pvals.csv': No such file or directory
[1] "ZMYND8"
[1] "AAAVS1"
Feature ZMYND8_1 ZMYND8_2 ZMYND8_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 2140 1697 1859 1620 1840 1729
3 SCYL3 608 551 661 430 460 437
4 C1orf112 1311 1123 1319 949 1277 1032
5 FGR 4209 3864 4504 2323 2401 2230
6 CFH 8 6 7 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.ZMYND8.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 17092 transcripts remain for analysis.
A total of 21 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
2342 2038.25 2372 1633 1900.25 1707
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
71
Outlier ERCCs for GLM r_m Estimate:
None
GLM log(r_m) estimate:
0.05464554
GLM log(r_m) estimate weighted s.e.:
0.1512365
Number of ERCCs in Mix 1 dyn range: 71
Number of ERCCs in Mix 2 dyn range: 71
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00040 ERCC-00120 ERCC-00134 ERCC-00168 ERCC-00073
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.MAX.AAAVS1.All.Pvals.csv': No such file or directory
[1] "MAX"
[1] "AAAVS1"
Feature MAX_1 MAX_2 MAX_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 1811 2032 2172 1620 1840 1729
3 SCYL3 571 656 742 430 460 437
4 C1orf112 1215 1387 1393 949 1277 1032
5 FGR 3640 4163 4084 2323 2401 2230
6 CFH 9 5 3 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.MAX.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 16957 transcripts remain for analysis.
A total of 15 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00048
ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083
ERCC-00086 ERCC-00098 ERCC-00117 ERCC-00138 ERCC-00142
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
2142 2502 2512 1651 1928 1725
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
77
Outlier ERCCs for GLM r_m Estimate:
ERCC-00039
GLM log(r_m) estimate:
-0.6875484
GLM log(r_m) estimate weighted s.e.:
0.1118295
Number of ERCCs in Mix 1 dyn range: 77
Number of ERCCs in Mix 2 dyn range: 77
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00123 ERCC-00134 ERCC-00168 ERCC-00041 ERCC-00073
ERCC-00104 ERCC-00109 ERCC-00137 ERCC-00156
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.MEF2C.AAAVS1.All.Pvals.csv': No such file or directory
[1] "MEF2C"
[1] "AAAVS1"
Feature MEF2C_1 MEF2C_2 MEF2C_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 1877 1951 1803 1620 1840 1729
3 SCYL3 459 498 519 430 460 437
4 C1orf112 1127 1049 1138 949 1277 1032
5 FGR 2652 3037 2824 2323 2401 2230
6 CFH 3 7 5 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.MEF2C.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 16818 transcripts remain for analysis.
A total of 21 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00134 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
1959.75 2084 2098.75 1669.75 1953.75 1743.75
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
71
Outlier ERCCs for GLM r_m Estimate:
None
GLM log(r_m) estimate:
0.2163322
GLM log(r_m) estimate weighted s.e.:
0.1600957
Number of ERCCs in Mix 1 dyn range: 71
Number of ERCCs in Mix 2 dyn range: 71
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00013 ERCC-00097 ERCC-00123 ERCC-00164 ERCC-00168
ERCC-00073
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.SPI1.AAAVS1.All.Pvals.csv': No such file or directory
[1] "SPI1"
[1] "AAAVS1"
Feature SPI1_1 SPI1_2 SPI1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 1 0 0 0
2 DPM1 2415 1729 2302 1620 1840 1729
3 SCYL3 798 648 744 430 460 437
4 C1orf112 1054 742 1104 949 1277 1032
5 FGR 2369 1766 2458 2323 2401 2230
6 CFH 44 22 58 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.SPI1.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 17191 transcripts remain for analysis.
A total of 11 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00016 ERCC-00024 ERCC-00048 ERCC-00057 ERCC-00061
ERCC-00075 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104
ERCC-00142
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
2758.5 2102.5 2723 1622 1888 1696
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
81
Outlier ERCCs for GLM r_m Estimate:
None
GLM log(r_m) estimate:
-1.274165
GLM log(r_m) estimate weighted s.e.:
0.2595628
Number of ERCCs in Mix 1 dyn range: 81
Number of ERCCs in Mix 2 dyn range: 81
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00012 ERCC-00109 ERCC-00117 ERCC-00123 ERCC-00134
ERCC-00137 ERCC-00138 ERCC-00017 ERCC-00041 ERCC-00073
ERCC-00081 ERCC-00156
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.LMO2.AAAVS1.All.Pvals.csv': No such file or directory
[1] "LMO2"
[1] "AAAVS1"
Feature LMO2_1 LMO2_2 LMO2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 1907 2199 2141 1620 1840 1729
3 SCYL3 561 592 644 430 460 437
4 C1orf112 1229 1188 1285 949 1277 1032
5 FGR 2777 3265 2969 2323 2401 2230
6 CFH 13 8 10 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.LMO2.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 16882 transcripts remain for analysis.
A total of 20 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00117
ERCC-00123 ERCC-00134 ERCC-00138 ERCC-00142 ERCC-00156
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
2221.75 2325 2312.5 1662 1942.5 1733
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
72
Outlier ERCCs for GLM r_m Estimate:
None
GLM log(r_m) estimate:
0.2066036
GLM log(r_m) estimate weighted s.e.:
0.1053062
Number of ERCCs in Mix 1 dyn range: 72
Number of ERCCs in Mix 2 dyn range: 72
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00120 ERCC-00137 ERCC-00158 ERCC-00164 ERCC-00168
ERCC-00073 ERCC-00109
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.MEF2D.AAAVS1.All.Pvals.csv': No such file or directory
[1] "MEF2D"
[1] "AAAVS1"
Feature MEF2D_1 MEF2D_2 MEF2D_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 1983 2451 2378 1620 1840 1729
3 SCYL3 542 670 576 430 460 437
4 C1orf112 1163 1481 1332 949 1277 1032
5 FGR 3680 4706 4308 2323 2401 2230
6 CFH 17 12 14 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.MEF2D.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 17024 transcripts remain for analysis.
A total of 17 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00048
ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083
ERCC-00086 ERCC-00098 ERCC-00109 ERCC-00117 ERCC-00123
ERCC-00142 ERCC-00156
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
2150.25 2742.25 2546 1642 1913 1713
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
75
Outlier ERCCs for GLM r_m Estimate:
ERCC-00144
GLM log(r_m) estimate:
-0.08897608
GLM log(r_m) estimate weighted s.e.:
0.1281747
Number of ERCCs in Mix 1 dyn range: 75
Number of ERCCs in Mix 2 dyn range: 75
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00041 ERCC-00134 ERCC-00073 ERCC-00104 ERCC-00137
ERCC-00138
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.GFI1.AAAVS1.All.Pvals.csv': No such file or directory
[1] "GFI1"
[1] "AAAVS1"
Feature GFI1_1 GFI1_2 GFI1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 3000 984 1798 1620 1840 1729
3 SCYL3 708 258 466 430 460 437
4 C1orf112 1813 586 1037 949 1277 1032
5 FGR 2396 788 1525 2323 2401 2230
6 CFH 42 18 35 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.GFI1.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 16711 transcripts remain for analysis.
A total of 21 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00134 ERCC-00138 ERCC-00142
ERCC-00156
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
3122 1018 1947 1690.5 1977 1757
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
71
Outlier ERCCs for GLM r_m Estimate:
None
GLM log(r_m) estimate:
0.0614345
GLM log(r_m) estimate weighted s.e.:
0.1106509
Number of ERCCs in Mix 1 dyn range: 71
Number of ERCCs in Mix 2 dyn range: 71
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00040 ERCC-00097 ERCC-00120 ERCC-00137 ERCC-00158
ERCC-00164 ERCC-00168 ERCC-00073
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
rm: cannot remove 'RNPv2.IRF8.AAAVS1.All.Pvals.csv': No such file or directory
[1] "IRF8"
[1] "AAAVS1"
Feature IRF8_1 IRF8_2 IRF8_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1 TSPAN6 0 0 0 0 0 0
2 DPM1 2211 2243 2269 1620 1840 1729
3 SCYL3 611 621 622 430 460 437
4 C1orf112 1390 1268 1244 949 1277 1032
5 FGR 3652 3917 4442 2323 2401 2230
6 CFH 16 17 15 6 5 9
Initializing the exDat list structure...
choseFDR = 0.1
repNormFactor is NULL
Filename root is: RNPv2.IRF8.AAAVS1
Transcripts were removed with a mean count < 1 or more than 2
replicates with 0 counts.
Original data contained 26672 transcripts.
After filtering 16800 transcripts remain for analysis.
A total of 18 out of 92
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041 ERCC-00048
ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083
ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109 ERCC-00117
ERCC-00123 ERCC-00138 ERCC-00142
repNormFactor is NULL,
Using Default Upper Quartile Normalization Method - 75th percentile
normVec:
2385 2327 2453 1672.25 1957.5 1744.25
Check for sample mRNA fraction differences(r_m)...
Number of ERCC Controls Used in r_m estimate
74
Outlier ERCCs for GLM r_m Estimate:
None
GLM log(r_m) estimate:
0.08350448
GLM log(r_m) estimate weighted s.e.:
0.1106991
Number of ERCCs in Mix 1 dyn range: 74
Number of ERCCs in Mix 2 dyn range: 74
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
controls were obtained for both samples:
ERCC-00012 ERCC-00013 ERCC-00134 ERCC-00137 ERCC-00164
ERCC-00168 ERCC-00073 ERCC-00156
Saving dynRangePlot to exDat
Length Class Mode
sampleInfo 11 -none- list
plotInfo 9 -none- list
erccInfo 4 -none- list
Transcripts 7 data.frame list
designMat 3 data.frame list
sampleNames 2 -none- character
idCols 6 data.frame list
normERCCDat 7 data.frame list
normFactor 6 -none- numeric
mnLibeFactor 1 -none- numeric
spikeFraction 1 -none- numeric
idColsAdj 6 data.frame list
Results 4 -none- list
Figures 3 -none- list
for i, v in res.items():
if abs(v[0]) > 3*v[1]:
print(i, v[0])
MYC -1.430576835252246 FLI1 0.2669788365781275 MYBL2 0.4145378723566837 IRF2BP2 -1.2421436514123199 MYB -0.5666496866194601 SP1 -0.9074679568707595 MAX -0.6875484167700773 SPI1 -1.2741653425093569
ERCC[ERCC.index.str.contains('ERCC-')][[i for i in ERCC.columns if 'AAVS1' in i]].mean()
mr186-MV411-RNP_AAVS1-r1 2705.054348 mr187-MV411-RNP_AAVS1-r2 3576.510870 mr188-MV411-RNP_AAVS1-r3 2621.956522 dtype: float64
ERCC[ERCC.index.str.contains('ERCC-')][[i for i in ERCC.columns if 'SPI1' in i]].mean()
mr138-MV411-RNP_SPI1-r4 34945.043478 mr139-MV411-RNP_SPI1-r5 8218.032609 mr140-MV411-RNP_SPI1-r6 8112.847826 dtype: float64
scaling = res
scaling
{'MYB': (-0.5666496866194601, 0.16455438308564643),
'MEF2C': (0.21633221486591706, 0.16009568270385865),
'LMO2': (0.2066036480588095, 0.10530622574043316),
'MEIS1': (0.20823559991440868, 0.16460447494728012),
'IKZF1': (0.26488156665796003, 0.11936483909099824),
'CEBPA': (0.04728101063315868, 0.22445160295741662),
'ELF2': (0.18835876643089494, 0.10013191844645487),
'MEF2D': (-0.08897607523943744, 0.12817467579731256),
'RUNX2': (-0.21771114300468575, 0.12354032980074721),
'IRF2BP2': (-1.2421436514123199, 0.2116786922337),
'MYBL2': (0.4145378723566837, 0.10998698893732116),
'MAX': (-0.6875484167700773, 0.11182951672314183),
'LYL1': (0.11547676609947306, 0.09762554626023551),
'RUNX1': (0.013627339651964025, 0.1356890688647267),
'FLI1': (0.2669788365781275, 0.08613995212995244),
'HOXA9': (0.41862648305962474, 0.14490862380188851),
'SPI1': (-1.2741653425093569, 0.2595628445427471),
'ZMYND8': (0.05464554271508272, 0.1512365231509835),
'MYC': (-1.430576835252246, 0.10549660323839703),
'GFI1': (0.061434499699685764, 0.11065088877815657),
'SP1': (-0.9074679568707595, 0.26590656079563213),
'ZEB2': (-0.1701758517854591, 0.1445402147201962),
'IRF8': (0.08350447764203282, 0.11069908626789565)}
h.dictToFileToFile(scaling,"../results/RNPv2/scaling.json")
scaling = h.fileToDict("../results/RNPv2/scaling.json")
%matplotlib inline
ig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(data.corr(),
xticklabels=data.columns,
yticklabels=data.columns, ax=ax)
<matplotlib.axes._subplots.AxesSubplot at 0x7f1f323aef10>
model = AgglomerativeClustering(n_clusters=15,linkage="average",
affinity="cosine", compute_full_tree=True)
labels = model.fit_predict(data.corr())
ii = itertools.count(data.shape[0])
tree = [{'node_id': next(ii), 'left': x[0], 'right':x[1]} for x in model.children_]
sort = labels.argsort()
data.to_csv('../results/RNPv2/counts.csv')
data = pd.read_csv('../results/RNPv2/counts.csv',index_col=0)
%matplotlib inline
sns.clustermap(data.corr(), figsize=(20, 20))
plt.savefig('../results/RNPv2/cluster_corr_count.pdf')
data.sum().tolist()
[31194860.27000039, 34734170.910000145, 41947063.61999977, 46794854.38000023, 45959725.04999988, 48187669.949999854, 43703179.22999995, 54815404.069999784, 51453432.84000005, 45694014.92000012, 37739408.16000016, 35925369.88000013, 45939275.84999983, 46049236.90999998, 47474159.87999978, 48525076.05999996, 45690646.539999746, 45157321.31999988, 56639651.62999975, 41764180.25999997, 53047868.079999454, 45963304.22999989, 42284214.549999595, 47507365.27999984, 43762796.11999972, 45382911.53999989, 46972864.209999934, 45345593.949999996, 37246793.10999977, 44768420.24999964, 42046067.34999971, 50800605.66999957, 51176436.25999986, 42939652.28999985, 44136137.289999895, 40740731.69999998, 38508207.550000004, 41500257.68999979, 41227894.83000014, 43337577.789999865, 43352847.28999995, 51316363.68999997, 40072110.34000017, 43282705.06999982, 51083598.04999976, 47140394.049999766, 37620883.43999992, 44039610.83999986, 61484638.129999965, 20045963.380000293, 38556072.189999774, 31634429.490000147, 29835972.010000307, 47235734.2699997, 34097279.28000006, 14896010.669999905, 40029165.88999997, 38726353.51999988, 37015620.039999984, 31655845.910000257, 37291884.63999993, 77020486.5900007, 76035190.80000074, 80821407.53000104, 88932208.80000061, 96200436.35000083, 33570457.16000021, 39525165.01000017, 35056555.59000006]
data.shape
(26580, 73)
data
| mr120-MV411-RNP_IRF2BP2-r4 | mr129-MV411-RNP_MYC-r4 | mr130-MV411-RNP_MYC-r5 | mr131-MV411-RNP_MYC-r6 | mr132-MV411-RNP_RUNX1-r4 | mr133-MV411-RNP_RUNX1-r5 | mr134-MV411-RNP_RUNX1-r6 | mr135-MV411-RNP_RUNX2-r4 | mr136-MV411-RNP_RUNX2-r5 | mr137-MV411-RNP_RUNX2-r6 | ... | mr186-MV411-RNP_AAVS1-r1 | mr187-MV411-RNP_AAVS1-r2 | mr188-MV411-RNP_AAVS1-r3 | mr126-MV411-RNP_MEF2D-r4 | mr189-MV411-RNP_SP1-r4 | mr190-MV411-RNP_SP1-r5 | mr191-MV411-RNP_SP1-r6 | mr192-MV411-RNP_SP1-r7 | mr127-MV411-RNP_MEF2D-r5 | mr128-MV411-RNP_MEF2D-r6 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| gene_id | |||||||||||||||||||||
| TSPAN6 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| DPM1 | 1619.00 | 2465.00 | 1701.00 | 1535.00 | 1863.00 | 2093.00 | 2027.00 | 2202.00 | 2148.00 | 2235.00 | ... | 1620.00 | 1840.00 | 1729.00 | 1983.00 | 1926.0 | 1846.00 | 1915.00 | 2633.00 | 2451.00 | 2378.00 |
| SCYL3 | 464.57 | 846.12 | 672.69 | 603.75 | 577.41 | 617.97 | 601.43 | 545.49 | 575.14 | 536.97 | ... | 430.78 | 460.04 | 437.36 | 542.42 | 572.5 | 507.48 | 580.49 | 713.56 | 670.02 | 576.38 |
| C1orf112 | 780.43 | 1031.90 | 755.31 | 676.25 | 1232.70 | 1209.00 | 1309.60 | 1370.50 | 1245.90 | 1257.10 | ... | 949.22 | 1277.00 | 1032.60 | 1163.60 | 783.5 | 1088.50 | 1184.50 | 1572.40 | 1481.00 | 1332.90 |
| FGR | 1443.00 | 8556.00 | 6387.00 | 5955.00 | 2359.00 | 2615.00 | 2258.00 | 3340.00 | 3229.00 | 3466.00 | ... | 2323.00 | 2401.00 | 2230.00 | 3680.00 | 2016.0 | 2285.00 | 2384.00 | 3106.00 | 4706.00 | 4308.00 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| BMP8B-AS1 | 3.00 | 2.00 | 2.00 | 4.00 | 10.00 | 9.00 | 9.00 | 8.00 | 4.00 | 7.00 | ... | 6.00 | 5.00 | 4.00 | 3.00 | 3.0 | 6.00 | 7.00 | 10.00 | 3.00 | 7.00 |
| H2AL1SP | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| NIPBL-DT | 462.00 | 650.00 | 478.00 | 431.00 | 777.00 | 829.00 | 782.00 | 709.00 | 743.00 | 776.00 | ... | 497.00 | 653.00 | 673.00 | 889.00 | 673.0 | 628.00 | 871.00 | 962.00 | 1099.00 | 1024.00 |
| CERNA2 | 2.00 | 7.00 | 8.00 | 3.00 | 13.00 | 6.00 | 24.00 | 9.00 | 8.00 | 12.00 | ... | 4.00 | 10.00 | 10.00 | 3.00 | 0.0 | 18.00 | 28.00 | 28.00 | 1.00 | 7.06 |
| LINC02689 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
26580 rows × 73 columns
experiments = list(set([i.split('-')[2] for i in data.columns[:-1]]))
experiments
['RNP_LMO2', 'RNP_MAX', 'RNP_HOXA9', 'RNP_LYL1', 'RNP_RUNX2', 'RNP_ELF2', 'RNP_IKZF1', 'RNP_FLI1', 'RNP_MEF2C', 'RNP_MYBL2', 'RNP_CEBPA', 'RNP_IRF8', 'RNP_MEF2D', 'RNP_MEIS1', 'RNP_IRF2BP2', 'RNP_ZEB2', 'RNP_GFI1', 'RNP_SP1', 'RNP_AAVS1', 'RNP_ZMYND8', 'RNP_SPI1', 'RNP_MYC', 'RNP_RUNX1', 'RNP_MYB']
experiments.remove("RNP_AAVS1")
data['gene_id'] = data.index
results = {}
for val in experiments:
design = pd.DataFrame(index=data.columns[:-1], columns=['DMSO','Target'],
data=np.array([[1 if 'RNP_AAVS1' in i else 0 for i in data.columns[:-1]],[1 if val+'-' in i else 0 for i in data.columns[:-1]]]).T)
design.index = design.index.astype(str).str.replace('-','.')
deseq = pyDESeq2.pyDESeq2(count_matrix=data, design_matrix = design,
design_formula='~DMSO + Target', gene_column="gene_id")
if abs(scaling[val.split('_')[1]][0]) > 3*scaling[val.split('_')[1]][1]:
print("estimating sizeFactors for this one")
deseq.run_estimate_size_factors(controlGenes=data.gene_id.str.contains("ERCC-"))
deseq.run_deseq()
deseq.get_deseq_result()
r = deseq.deseq_result
r.pvalue = np.nan_to_num(np.array(r.pvalue), 1)
r.log2FoldChange = np.nan_to_num(np.array(r.log2FoldChange), 0)
results[val] = r
3.3.2 estimating sizeFactors for this one
R[write to console]: using pre-existing size factors R[write to console]: estimating dispersions R[write to console]: gene-wise dispersion estimates R[write to console]: mean-dispersion relationship R[write to console]: final dispersion estimates R[write to console]: fitting model and testing R[write to console]: -- replacing outliers and refitting for 127 genes -- DESeq argument 'minReplicatesForReplace' = 7 -- original counts are preserved in counts(dds) R[write to console]: estimating dispersions R[write to console]: fitting model and testing
for val in experiments:
a = h.volcano(results[val],tohighlight=ctf,title=val, maxvalue= 60, searchbox=True, minlogfold=0.5)
try:
show(a)
except RuntimeError:
show(a)
for k, val in results.items():
val.to_csv('../results/RNPv2/deseq_'+k+".csv")
results = {}
des = ! ls ../results/RNPv2/deseq_RNP_*.csv
for val in des:
results["RNP_"+val.split('RNP_')[1].split('.')[0]] = pd.read_csv(val,index_col=0)
results.keys()
dict_keys(['RNP_all', 'RNP_CEBPA', 'RNP_ELF2', 'RNP_FLI1', 'RNP_GFI1', 'RNP_HOXA9', 'RNP_IKZF1', 'RNP_IRF2BP2', 'RNP_IRF8', 'RNP_LMO2', 'RNP_LYL1', 'RNP_MAX', 'RNP_MEF2C', 'RNP_MEF2D', 'RNP_MEIS1', 'RNP_MYB', 'RNP_MYBL2', 'RNP_MYC', 'RNP_RUNX1', 'RNP_RUNX2', 'RNP_SP1', 'RNP_SPI1', 'RNP_ZEB2', 'RNP_ZMYND8'])
results.pop('RNP_all')
| RNP_CEBPA_fc_log2 | RNP_CEBPA_padj | RNP_CEBPA_pval | RNP_ELF2_fc_log2 | RNP_ELF2_padj | RNP_ELF2_pval | RNP_FLI1_fc_log2 | RNP_FLI1_padj | RNP_FLI1_pval | RNP_GFI1_fc_log2 | ... | RNP_SP1_pval | RNP_SPI1_fc_log2 | RNP_SPI1_padj | RNP_SPI1_pval | RNP_ZEB2_fc_log2 | RNP_ZEB2_padj | RNP_ZEB2_pval | RNP_ZMYND8_fc_log2 | RNP_ZMYND8_padj | RNP_ZMYND8_pval | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| TSPAN6 | 0.642130 | NaN | 0.930596 | -0.064479 | NaN | 0.993022 | 0.424549 | 0.999961 | 0.954080 | 0.207821 | ... | 0.875304 | -0.393437 | NaN | 9.573317e-01 | 0.022311 | NaN | 0.997585 | -0.167083 | NaN | 0.981920 |
| DPM1 | -0.146773 | 0.175869 | 0.055432 | 0.109824 | 0.986459 | 0.142868 | 0.487813 | 0.940560 | 0.273223 | 0.086520 | ... | 0.024196 | -1.578211 | 0.000952 | 1.818801e-04 | 0.208905 | 0.018236 | 0.003507 | -0.164134 | 0.99984 | 0.026264 |
| SCYL3 | 0.060378 | 0.701136 | 0.497529 | 0.008106 | 0.998610 | 0.924098 | 0.350550 | 0.940560 | 0.394458 | -0.105824 | ... | 0.018508 | -1.266968 | 0.003332 | 1.240800e-03 | -0.055466 | 0.695996 | 0.516608 | 0.005266 | 0.99984 | 0.950385 |
| C1orf112 | 0.126713 | 0.461042 | 0.242680 | 0.106333 | 0.986459 | 0.319820 | 0.597610 | 0.940560 | 0.211661 | 0.140322 | ... | 0.036461 | -1.948156 | 0.000159 | 1.429172e-05 | -0.052419 | 0.779765 | 0.626967 | 0.048705 | 0.99984 | 0.650304 |
| FGR | -0.582974 | 0.118835 | 0.031944 | -0.130497 | 0.986459 | 0.639216 | 0.340905 | 0.945846 | 0.447361 | -0.812484 | ... | 0.003588 | -2.030338 | 0.000016 | 7.057808e-07 | -0.127653 | 0.792324 | 0.646082 | 0.433732 | 0.99984 | 0.112956 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| ERCC-00164 | -0.922723 | 0.662125 | 0.447910 | -3.518530 | 0.986459 | 0.010779 | -0.155868 | 0.999961 | 0.878590 | -3.252890 | ... | 0.504492 | 0.345732 | 0.654121 | 6.166040e-01 | -1.553886 | 0.401426 | 0.217815 | -1.146495 | NaN | 0.305981 |
| ERCC-00165 | -0.503993 | 0.579337 | 0.354594 | -1.030428 | 0.986459 | 0.054943 | -0.159404 | 0.940560 | 0.209857 | -0.689089 | ... | 0.619204 | 0.005076 | 0.965538 | 9.608078e-01 | -0.227981 | 0.811671 | 0.674145 | -0.715056 | 0.99984 | 0.185209 |
| ERCC-00168 | -0.723045 | 0.693688 | 0.487767 | -0.300976 | 0.986871 | 0.732154 | 0.232214 | 0.999961 | 0.776989 | -1.026813 | ... | 0.444718 | 0.032827 | 0.964837 | 9.597500e-01 | -1.497910 | 0.329879 | 0.162756 | -2.296520 | 0.99984 | 0.060384 |
| ERCC-00170 | -0.708528 | 0.426421 | 0.213389 | -0.912857 | 0.986459 | 0.102280 | -0.104913 | 0.999961 | 0.646929 | -0.935368 | ... | 0.589275 | -0.009492 | 0.965538 | 9.607581e-01 | -0.245517 | 0.801773 | 0.660472 | -0.831967 | 0.99984 | 0.135771 |
| ERCC-00171 | -0.449763 | 0.634579 | 0.415045 | -0.853544 | 0.986459 | 0.110680 | -0.083967 | 0.940560 | 0.298420 | -0.633504 | ... | 0.942262 | 0.005329 | 0.953708 | 9.472072e-01 | -0.295838 | 0.754667 | 0.592621 | -0.676164 | 0.99984 | 0.208286 |
26672 rows × 69 columns
tosave = pd.DataFrame(index=results['RNP_CEBPA'].index)
for k,v in results.items():
tosave[k+'_fc_log2'] = v.log2FoldChange
tosave[k+'_padj'] = v.padj
tosave[k+'_pval'] = v.pvalue
tosave.to_csv('../results/RNPv2/deseq_RNP_all.csv')
ctf.extend(['IRF2BP2','MYBL2','IKZF1'])
deseq = pd.DataFrame(index=ctf)
for k, val in results.items():
deseq[k] = [i.log2FoldChange if i.pvalue<0.05 else 0 for a, i in val.loc[ctf].iterrows()]
deseq
| RNP_CEBPA | RNP_ELF2 | RNP_FLI1 | RNP_GFI1 | RNP_HOXA9 | RNP_IKZF1 | RNP_IRF2BP2 | RNP_IRF8 | RNP_LMO2 | RNP_LYL1 | ... | RNP_MEIS1 | RNP_MYB | RNP_MYBL2 | RNP_MYC | RNP_RUNX1 | RNP_RUNX2 | RNP_SP1 | RNP_SPI1 | RNP_ZEB2 | RNP_ZMYND8 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ARID2 | 0.340050 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.947945 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | 0.000000 | 0 | -1.695128 | 0.000000 | 0.00000 | -0.868870 | -1.234878 | 0.000000 | 0.000000 |
| CEBPA | 0.665869 | 0.000000 | 0 | -0.514618 | 0.000000 | 0.000000 | -1.459458 | 0.417750 | 0.000000 | 0.000000 | ... | 0.00000 | -1.024082 | 0 | -1.857561 | -0.301267 | 0.00000 | -0.920091 | -1.078669 | 0.000000 | 0.000000 |
| CEBPE | -1.171271 | 0.000000 | 0 | -1.175967 | 0.000000 | 0.000000 | -2.094375 | 1.783117 | 0.000000 | 0.000000 | ... | 0.00000 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.00000 | -1.109580 | -2.989194 | 0.000000 | 0.000000 |
| E2F3 | 0.000000 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.932833 | 0.200048 | 0.000000 | 0.000000 | ... | 0.00000 | -1.143534 | 0 | -2.178933 | 0.000000 | 0.00000 | -0.827072 | -1.312344 | 0.000000 | 0.000000 |
| FLI1 | 0.000000 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.519046 | 0.360083 | 0.000000 | 0.000000 | ... | 0.00000 | 0.000000 | 0 | -1.487533 | 0.000000 | 0.00000 | -0.902816 | -2.198507 | -0.562026 | 0.000000 |
| FOSL2 | -2.904566 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.383493 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | -2.238943 | 0 | -1.380069 | 0.000000 | 0.00000 | 0.000000 | -2.014765 | 0.000000 | 0.000000 |
| GFI1 | 0.000000 | 0.000000 | 0 | -0.821509 | 0.000000 | 0.000000 | -1.871571 | 0.459210 | 0.000000 | 0.000000 | ... | 0.00000 | -1.229774 | 0 | -1.410259 | 0.000000 | 0.00000 | -0.931032 | -1.207145 | 0.000000 | 0.000000 |
| GFI1B | 0.000000 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | 0.000000 | 0 | -3.412761 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| HHEX | -1.019358 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.924986 | 0.542010 | -0.463475 | -0.496866 | ... | 0.00000 | 0.000000 | 0 | -1.272680 | 0.000000 | 0.00000 | -1.098091 | -1.852349 | 0.524570 | 0.000000 |
| IRF8 | -1.838495 | 0.000000 | 0 | 0.653750 | 0.000000 | 0.000000 | -2.353826 | -0.718261 | 0.000000 | 0.000000 | ... | 0.00000 | -1.468729 | 0 | -1.964017 | 0.000000 | 0.00000 | 0.000000 | -2.057056 | 0.000000 | 0.000000 |
| LYL1 | 0.000000 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.810271 | 0.000000 | 0.000000 | -0.837499 | ... | 0.00000 | -1.408875 | 0 | -2.145511 | 0.818892 | 0.00000 | -1.082395 | -2.897184 | 0.000000 | 0.000000 |
| MEF2C | 0.866923 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.529377 | 0.000000 | -0.548640 | -0.630461 | ... | 0.00000 | 0.000000 | 0 | -1.918495 | 0.000000 | 0.00000 | 0.000000 | -1.700600 | 0.000000 | 0.000000 |
| MEF2D | 0.000000 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -2.103142 | -1.102758 | 0.000000 | 0.000000 | ... | 0.00000 | -2.488134 | 0 | -2.010969 | 0.000000 | 0.00000 | 0.000000 | -2.236388 | 0.000000 | 0.000000 |
| MEIS1 | 0.406437 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.737788 | 0.000000 | 0.000000 | 0.000000 | ... | -1.64894 | 0.000000 | 0 | -1.568498 | 0.000000 | 0.00000 | -0.791120 | -1.337573 | 0.000000 | 0.000000 |
| MTF1 | -0.359724 | 0.000000 | 0 | -0.234824 | 0.000000 | 0.000000 | -1.720132 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | 0.000000 | 0 | -1.463445 | 0.000000 | 0.00000 | -0.821132 | -1.529028 | 0.000000 | 0.000000 |
| MYB | -0.305394 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -2.112967 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | 0.000000 | 0 | -1.829395 | 0.000000 | 0.00000 | -0.857136 | -1.936176 | -0.374210 | 0.000000 |
| MYC | -0.866361 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.711034 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | -1.252326 | 0 | -1.371390 | 0.000000 | 0.00000 | -0.959778 | -1.670780 | -0.738478 | 0.000000 |
| PLAGL2 | 0.000000 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -2.263852 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | -1.581096 | 0 | -2.051150 | 0.000000 | 0.00000 | -0.853702 | -1.663457 | 0.000000 | 0.000000 |
| RUNX1 | 0.000000 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.940890 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | 0.000000 | 0 | -1.458095 | -0.481340 | 0.00000 | -0.705648 | -1.203356 | 0.282527 | 0.000000 |
| RUNX2 | 0.000000 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.356702 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | 0.000000 | 0 | -2.025772 | 0.000000 | -1.77579 | 0.000000 | 0.000000 | 0.469164 | 0.000000 |
| RXRA | -0.335731 | 0.000000 | 0 | -0.412112 | 0.000000 | 0.214228 | -1.580221 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | 0.000000 | 0 | -1.692986 | 0.000000 | 0.00000 | -0.839900 | -1.556593 | 0.286307 | 0.000000 |
| SETDB1 | 0.000000 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.718604 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | 0.000000 | 0 | -1.996760 | 0.000000 | 0.00000 | -0.801147 | -1.298399 | -0.225842 | 0.000000 |
| SNAPC5 | 0.000000 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.386950 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | -0.971078 | 0 | -2.138556 | 0.000000 | 0.00000 | -0.895299 | -1.633219 | 0.000000 | 0.000000 |
| SP1 | 0.000000 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.835315 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | -0.918558 | 0 | -1.918205 | 0.000000 | 0.00000 | -1.403918 | -1.414893 | 0.000000 | 0.000000 |
| SPI1 | 0.000000 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.479062 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | 0.000000 | 0 | -1.568381 | 0.000000 | 0.00000 | -0.899333 | -2.785086 | 0.000000 | 0.000000 |
| SREBF1 | 0.826931 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -2.030056 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | -1.056367 | 0 | -2.835746 | 0.000000 | 0.00000 | -1.176922 | -1.621751 | 0.537165 | 0.000000 |
| STAT5B | -0.282620 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.733956 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | 0.000000 | 0 | -1.970013 | 0.000000 | 0.00000 | -0.838704 | -1.016132 | -0.250087 | 0.000000 |
| TERF2 | 0.000000 | 0.000000 | 0 | 0.118994 | 0.000000 | 0.000000 | -1.631483 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | 0.000000 | 0 | -2.271328 | 0.000000 | 0.00000 | -0.799337 | -1.392899 | -0.128301 | 0.000000 |
| TFAP4 | -0.481024 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.363109 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | 0.000000 | 0 | -3.278864 | 0.000000 | 0.00000 | -0.898665 | -1.620129 | 0.000000 | 0.000000 |
| ZEB2 | -0.269948 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.204938 | -1.711839 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | -0.914863 | 0 | -1.681862 | 0.000000 | 0.00000 | -0.758267 | -1.224329 | -0.384563 | 0.000000 |
| ZFPM1 | 1.281751 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.780308 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | 0.000000 | 0 | -1.613582 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| ZMYND8 | 0.000000 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.625913 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | 0.000000 | 0 | -1.954080 | 0.000000 | 0.00000 | -0.864796 | -1.163031 | 0.000000 | -0.169019 |
| LMO2 | 0.347173 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -2.072513 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | -1.648474 | 0 | -1.613718 | 0.000000 | 0.00000 | -0.782027 | -1.272213 | 0.000000 | 0.000000 |
| MAX | 0.000000 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.505584 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | -0.933559 | 0 | -1.891442 | 0.000000 | 0.00000 | 0.000000 | -1.352632 | 0.000000 | 0.000000 |
| ELF2 | 0.000000 | -0.773196 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.346107 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | 0.000000 | 0 | -1.701935 | 0.000000 | 0.00000 | -0.780536 | -1.283684 | 0.000000 | 0.000000 |
| ETV6 | 0.249116 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -1.279559 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | -1.340763 | 0 | -1.770362 | 0.000000 | 0.00000 | -0.892123 | -1.613257 | 0.000000 | 0.000000 |
| HOXA9 | 0.000000 | 0.000000 | 0 | 0.000000 | -0.683747 | -0.246604 | -1.773468 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | 0.000000 | 0 | -1.720200 | 0.000000 | 0.00000 | -0.708210 | -1.199064 | 0.495589 | 0.000000 |
| GATA2 | -3.803775 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -3.559850 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | 0.000000 | 0 | -3.539672 | -2.493064 | 0.00000 | 0.000000 | 1.276900 | 0.000000 | -1.531774 |
| IRF2BP2 | 0.000000 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -2.578022 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | -0.991376 | 0 | -1.246974 | 0.000000 | 0.00000 | 0.000000 | -1.414520 | 0.000000 | 0.000000 |
| MYBL2 | 0.000000 | 0.000000 | 0 | 0.000000 | 0.000000 | 0.000000 | -2.082278 | 0.000000 | 0.000000 | 0.000000 | ... | 0.00000 | -1.224332 | 0 | -2.740802 | 0.000000 | 0.00000 | -0.902065 | -1.787022 | 0.000000 | 0.000000 |
| IKZF1 | 0.000000 | 0.000000 | 0 | 0.000000 | 0.000000 | -0.375969 | -1.812064 | -0.418599 | 0.000000 | 0.000000 | ... | 0.00000 | 0.000000 | 0 | -1.668116 | 0.000000 | 0.00000 | 0.000000 | -2.168195 | 0.000000 | 0.000000 |
41 rows × 23 columns
fig = sns.clustermap(figsize=(25,20), data=deseq,vmin=-1,vmax=1,xticklabels=deseq.columns, yticklabels=deseq.index)
fig.savefig('../results/RNPv2/clustermap_ctf_deseq.pdf')
deseq.columns = [i.split('_')[1] for i in deseq.columns]
deseq = deseq.loc[deseq.columns]
deseq.to_csv('../results/RNPv2/deseq_CTFmat.csv')
deseq = pd.read_csv('../results/RNPv2/deseq_CTFmat.csv',index_col=0)
net = nx.from_pandas_adjacency(((deseq < -0.8) | (deseq > 0.4)).T,create_using=nx.DiGraph)
pos = nx.nx_agraph.graphviz_layout(net, prog="neato")
colors = ['red' if deseq.loc[i[1],i[0]]> 0 else 'blue' for i in net.edges]
plt.figure(figsize=(8, 8))
nx.draw(net,pos,with_labels=True,edge_color=colors)
plt.show()
deseq[(deseq > -0.8) & (deseq < 0.3)]=0
net = nx.from_pandas_adjacency(deseq.T,create_using=nx.DiGraph)
pos = nx.nx_agraph.graphviz_layout(net, prog='dot')
colors = [-deseq.loc[i[1],i[0]] for i in net.edges]
colors = [i/-min(colors) if i <0 else i/max(colors) for i in colors]
plt.figure(figsize=(8, 8))
nx.draw(net,pos,with_labels=True, edge_color=colors,edge_cmap=plt.cm.RdYlBu)
plt.show()
col = {v:i for i, v in enumerate(set([i.split('-')[2] for i in data.columns[:-1]]))}
red = PCA(2).fit_transform(data[data.columns[:-1]].T)
h.scatter(red, labels=data.columns[:-1], radi=60000, colors=[col[i.split('-')[2]] for i in data.columns[:-1]])
red = PCA(30).fit_transform(data[data.columns[:-1]].T)
red = TSNE(2,4).fit_transform(red)
mr129-MYC-r4 seems weird
h.scatter(red, labels=data.columns[:-1], radi=70, colors=[col[i.split('-')[2]] for i in data.columns[:-1]])
pca = PCA(20)
red = pca.fit_transform(data[data.columns[:-1]].T)
pca.explained_variance_ratio_
array([0.50756446, 0.23809133, 0.07047153, 0.0639693 , 0.03091387,
0.02514611, 0.01419286, 0.01084615, 0.00979492, 0.00555606,
0.00471327, 0.00311185, 0.00237603, 0.00216884, 0.00169693,
0.00132933, 0.0012026 , 0.00105288, 0.00081574, 0.00060336])
data
| mr120-MV411-RNP_IRF2BP2-r4 | mr129-MV411-RNP_MYC-r4 | mr130-MV411-RNP_MYC-r5 | mr131-MV411-RNP_MYC-r6 | mr132-MV411-RNP_RUNX1-r4 | mr133-MV411-RNP_RUNX1-r5 | mr134-MV411-RNP_RUNX1-r6 | mr135-MV411-RNP_RUNX2-r4 | mr136-MV411-RNP_RUNX2-r5 | mr137-MV411-RNP_RUNX2-r6 | ... | mr182-MV411-RNP_MYBL2-r3 | mr183-MV411-RNP_HOXA9-r4 | mr184-MV411-RNP_HOXA9-r5 | mr185-MV411-RNP_HOXA9-r6 | mr186-MV411-RNP_AAVS1-r1 | mr187-MV411-RNP_AAVS1-r2 | mr188-MV411-RNP_AAVS1-r3 | mr126-MV411-RNP_MEF2D-r4 | mr127-MV411-RNP_MEF2D-r5 | mr128-MV411-RNP_MEF2D-r6 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| gene_id | |||||||||||||||||||||
| TSPAN6 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.0 | 0.0 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| DPM1 | 1619.00 | 2465.00 | 1701.00 | 1535.00 | 1863.00 | 2093.00 | 2027.00 | 2202.00 | 2148.00 | 2235.00 | ... | 3272.00 | 3686.0 | 3990.0 | 4714.0 | 1620.00 | 1840.00 | 1729.00 | 1983.00 | 2451.00 | 2378.00 |
| SCYL3 | 464.57 | 846.12 | 672.69 | 603.75 | 577.41 | 617.97 | 601.43 | 545.49 | 575.14 | 536.97 | ... | 961.52 | 1024.2 | 1155.4 | 1316.6 | 430.78 | 460.04 | 437.36 | 542.42 | 670.02 | 576.38 |
| C1orf112 | 780.43 | 1031.90 | 755.31 | 676.25 | 1232.70 | 1209.00 | 1309.60 | 1370.50 | 1245.90 | 1257.10 | ... | 1647.50 | 2260.8 | 2422.6 | 2757.4 | 949.22 | 1277.00 | 1032.60 | 1163.60 | 1481.00 | 1332.90 |
| FGR | 1443.00 | 8556.00 | 6387.00 | 5955.00 | 2359.00 | 2615.00 | 2258.00 | 3340.00 | 3229.00 | 3466.00 | ... | 4120.00 | 4514.0 | 4748.0 | 5478.0 | 2323.00 | 2401.00 | 2230.00 | 3680.00 | 4706.00 | 4308.00 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| BMP8B-AS1 | 3.00 | 2.00 | 2.00 | 4.00 | 10.00 | 9.00 | 9.00 | 8.00 | 4.00 | 7.00 | ... | 9.00 | 12.0 | 12.0 | 18.0 | 6.00 | 5.00 | 4.00 | 3.00 | 3.00 | 7.00 |
| H2AL1SP | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.0 | 0.0 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| NIPBL-DT | 462.00 | 650.00 | 478.00 | 431.00 | 777.00 | 829.00 | 782.00 | 709.00 | 743.00 | 776.00 | ... | 1120.00 | 1375.0 | 1594.0 | 1686.0 | 497.00 | 653.00 | 673.00 | 889.00 | 1099.00 | 1024.00 |
| CERNA2 | 2.00 | 7.00 | 8.00 | 3.00 | 13.00 | 6.00 | 24.00 | 9.00 | 8.00 | 12.00 | ... | 18.00 | 22.0 | 29.0 | 31.0 | 4.00 | 10.00 | 10.00 | 3.00 | 1.00 | 7.06 |
| LINC02689 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.0 | 0.0 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
26574 rows × 69 columns
res = {}
experiments
['RNP_SPI1', 'RNP_RUNX2', 'RNP_GFI1', 'RNP_IRF2BP2', 'RNP_MYC', 'RNP_LMO2', 'RNP_IKZF1', 'RNP_MYBL2', 'RNP_MEIS1', 'RNP_IRF8', 'RNP_ELF2', 'RNP_SP1', 'RNP_LYL1', 'RNP_CEBPA', 'RNP_ZEB2', 'RNP_MEF2D', 'RNP_ZMYND8', 'RNP_RUNX1', 'RNP_FLI1', 'RNP_HOXA9', 'RNP_MYB', 'RNP_MAX', 'RNP_MEF2C']
res
{'SPI1': (-1.2741653425093569, 0.2595628445427471),
'HOXA9': (0.41862648305962474, 0.14490862380188851),
'MYC': (-1.430576835252246, 0.10549660323839703),
'GFI1': (0.061434499699685764, 0.11065088877815657),
'ELF2': (0.18835876643089494, 0.10013191844645487),
'IRF2BP2': (-1.2421436514123199, 0.2116786922337),
'RUNX2': (-0.21771114300468575, 0.12354032980074721),
'CEBPA': (0.04728101063315868, 0.22445160295741662),
'SP1': (-0.9074679568707595, 0.26590656079563213),
'IKZF1': (0.26488156665796003, 0.11936483909099824),
'ZMYND8': (0.05464554271508272, 0.1512365231509835),
'ZEB2': (-0.1701758517854591, 0.1445402147201962),
'MEF2D': (-0.08897607523943744, 0.12817467579731256),
'MYBL2': (0.4145378723566837, 0.10998698893732116),
'LMO2': (0.2066036480588095, 0.10530622574043316),
'MAX': (-0.6875484167700773, 0.11182951672314183),
'RUNX1': (0.013627339651964025, 0.1356890688647267),
'MEIS1': (0.20823559991440868, 0.16460447494728012),
'MYB': (-0.5666496866194601, 0.16455438308564643),
'LYL1': (0.11547676609947306, 0.09762554626023551),
'MEF2C': (0.21633221486591706, 0.16009568270385865),
'IRF8': (0.08350447764203282, 0.11069908626789565),
'FLI1': (0.2669788365781275, 0.08613995212995244)}
for val in experiments:
print(val)
totest = data[[v for v in data.columns[:-1] if val+'-' in v or 'AAVS1' in v]]
cls = ['Condition' if val+'-' in v else 'DMSO' for v in totest.columns]
if abs(scaling[val.split('_')[1]][0]) > 3*scaling[val.split('_')[1]][1]:
print("rescaling this one")
cols = [i for i in totest.columns if val+'-' in i]
totest[cols] = totest[cols]*(2**scaling[val.split('_')[1]][0])
else:
continue
res[val] = gseapy.gsea(data=totest, gene_sets='WikiPathways_2013',
cls= cls, no_plot=False, processes=8)
res[val].res2d['Term'] = [i for i in res[val].res2d.index]
sns.barplot(data=res[val].res2d.iloc[:25], x="es", y="Term",
hue_order="geneset_size").set_title(val)
RNP_MYB rescaling this one
/home/jeremie/.local/lib/python3.8/site-packages/pandas/core/frame.py:2963: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy self[k1] = value[k2]
with open('../data/pathways/wikipathway_RNPv2', 'wb') as f:
pickle.dump(res,f)
with open('../data/pathways/wikipathway_RNPv2','rb') as f:
res = pickle.load(f)
import matplotlib.pyplot as plt
%matplotlib inline
for val in experiments:
res[val].res2d['Term'] = [i[3:].split('WP')[0] for i in res[val].res2d['Term']]
sns.barplot(data=res[val].res2d.iloc[:25], x="es", y="Term",
hue_order="geneset_size").set_title(val)
plt.show()
a = set()
for k, val in res.items():
a.update(set(val.res2d.index))
a = {i:[0]*len(res) for i in a}
for n,(k, val) in enumerate(res.items()):
for i,v in val.res2d.iterrows():
a[i][n] = v.es
res = pd.DataFrame(a, index=res.keys())
res.columns = [i[3:].split('WP')[0] for i in res.columns]
res.index = [i.split('_')[1] for i in res.index]
fig = sns.clustermap(figsize=(25,20), data=res,vmin=-1,vmax=1,xticklabels=res.columns, yticklabels=res.index)
res.to_csv('../results/RNPv2/wikipathway_gsea.csv')
fig.savefig("../results/RNPv2/enriched_terms_scaled_gsea.pdf")
res = {}
for i, val in enumerate(['RNP_MYB']):
print(val)
totest = data[[v for v in data.columns[:-1] if val+'-' in v or 'AAVS1' in v]]
cls = ['Condition' if val+'-' in v else 'DMSO' for v in totest.columns]
if abs(scaling[val.split('_')[1]][0]) > 3*scaling[val.split('_')[1]][1]:
print("rescaling this one")
cols = [i for i in totest.columns if val+'-' in i]
totest[cols] = totest[cols]*(2**scaling[val.split('_')[1]][0])
elif val in res:
continue
res[val] = gseapy.gsea(data=totest, gene_sets='GO_Biological_Process_2015',
cls= cls, no_plot=False, processes=8)
res[val].res2d['Term'] = [i for i in res[val].res2d.index]
plt.figure(i)
sns.barplot(data=res[val].res2d.iloc[:25], x="es", y="Term",
hue_order="geneset_size").set_title(val)
RNP_MYB rescaling this one
/home/jeremie/.local/lib/python3.8/site-packages/pandas/core/frame.py:2963: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy self[k1] = value[k2]
for i, val in enumerate(experiments):
print(val)
totest = data[[v for v in data.columns[:-1] if val+'-' in v or 'AAVS1' in v]]
cls = ['Condition' if val+'-' in v else 'DMSO' for v in totest.columns]
if abs(scaling[val.split('_')[1]][0]) > 3*scaling[val.split('_')[1]][1]:
print("rescaling this one")
cols = [i for i in totest.columns if val+'-' in i]
totest[cols] = totest[cols]*(2**scaling[val.split('_')[1]][0])
elif val in res:
continue
res[val] = gseapy.gsea(data=totest, gene_sets='GO_Biological_Process_2015',
cls= cls, no_plot=False, processes=8)
res[val].res2d['Term'] = [i for i in res[val].res2d.index]
plt.figure(i)
sns.barplot(data=res[val].res2d.iloc[:25], x="es", y="Term",
hue_order="geneset_size").set_title(val)
RNP_SPI1 rescaling this one RNP_HOXA9 RNP_MYC rescaling this one RNP_GFI1 RNP_ELF2 RNP_IRF2BP2 rescaling this one RNP_RUNX2 RNP_CEBPA RNP_SP1 rescaling this one RNP_IKZF1 RNP_ZMYND8 RNP_ZEB2 RNP_MEF2D RNP_MYBL2 rescaling this one RNP_LMO2 RNP_MAX rescaling this one RNP_RUNX1 RNP_MEIS1 RNP_MYB rescaling this one RNP_LYL1 RNP_MEF2C RNP_IRF8 RNP_FLI1 rescaling this one
/home/jeremie/.local/lib/python3.7/site-packages/ipykernel_launcher.py:14: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
with open('../data/pathways/GO_Biological_Process_2015_RNPv2', 'wb') as f:
pickle.dump(res,f)
with open('../data/pathways/GO_Biological_Process_2015_RNPv2','rb') as f:
res = pickle.load(f)
for i, v in res.items():
res[i].res2d['Term'] = [i.split('(GO')[0] for i in v.res2d['Term']]
creating matrices
a = set()
for k, val in res.items():
a.update(set(val.res2d.Term))
a = {i:[0]*len(res) for i in a}
for n,(k, val) in enumerate(res.items()):
for i,v in val.res2d.iterrows():
a[v.Term][n] = v.es
res = pd.DataFrame(a, index=res.keys())
fig = sns.clustermap(figsize=(25,20), data=res,vmin=-1,vmax=1, yticklabels=res.index)
/home/jeremie/.local/lib/python3.8/site-packages/seaborn/matrix.py:649: UserWarning: Clustering large matrix with scipy. Installing `fastcluster` may give better performance. warnings.warn(msg)
fig.savefig("../results/RNPv2/enriched_terms_scaled_gsea.pdf")
model = DBSCAN()
labels = model.fit_predict(res)
ii = itertools.count(res.shape[0])
tree = [{'node_id': next(ii), 'left': x[0], 'right':x[1]} for x in model.children_]
sort = labels.argsort()
sort = np.array([0, 2, 3, 6, 9, 12, 1, 7, 11, 16, 10, 14, 19, 4, 5, 8, 13, 15, 17, 18, 20, 21, 22])
sns.clustermap(res.T.corr())
<seaborn.matrix.ClusterGrid at 0x7f89bb388a60>
a = h.plotCorrelationMatrix(res.values[sort], res.index[sort].tolist(), interactive=True, title="RNP2_bioproc_corr")#,colors=[labels[i] for i in sort])
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('alphas', 529), ('colors', 529), ('data', 23), ('xname', 529), ('yname', 529)
red = PCA(2).fit_transform(res)
h.scatter(red, labels=res.index, radi=1, colors=labels)
red = TSNE(2,2).fit_transform(res)
h.scatter(red, labels=res.index, radi=9, colors=labels)
res.to_csv('../results/RNPv2/biopathway_gsea.csv')
res = pd.read_csv('../results/RNPv2/biopathway_gsea.csv',index_col=0)
data = pd.DataFrame(index=results['RNP_SP1'].index.tolist())
for i, v in results.items():
data[i]=v.log2FoldChange
model = AgglomerativeClustering(n_clusters=8,linkage="average",
affinity="cosine", compute_full_tree=True)
labels = model.fit_predict(res)
ii = itertools.count(res.shape[0])
tree = [{'node_id': next(ii), 'left': x[0], 'right':x[1]} for x in model.children_]
sort = labels.argsort()
a = h.plotCorrelationMatrix(data.values.T[sort], data.columns[sort].tolist(), interactive=True, title="transcriptome correlation")#,colors=[labels[i] for i in sort])
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('alphas', 529), ('colors', 529), ('data', 23), ('xname', 529), ('yname', 529)
/home/jeremie/.local/lib/python3.8/site-packages/bokeh/io/saving.py:125: UserWarning: save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN
warn("save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN")
/home/jeremie/.local/lib/python3.8/site-packages/bokeh/io/saving.py:138: UserWarning: save() called but no title was supplied and output_file(...) was never called, using default title 'Bokeh Plot'
warn("save() called but no title was supplied and output_file(...) was never called, using default title 'Bokeh Plot'")
## Filtered version (set to 0 genes with low p_value)
data = pd.DataFrame(index=results['RNP_SP1'].index.tolist())
for i, v in results.items():
v.loc[v[v.pvalue>0.01].index,"log2FoldChange"]==0
data[i]=v.log2FoldChange
a = h.plotCorrelationMatrix(data.values.T[sort], data.columns[sort].tolist(), interactive=True, title="transcriptome correlation")
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('alphas', 529), ('colors', 529), ('data', 23), ('xname', 529), ('yname', 529)
sns.clustermap(data.corr())
<seaborn.matrix.ClusterGrid at 0x7f89b87cca30>
data.corr()
| RNP_CEBPA | RNP_ELF2 | RNP_FLI1 | RNP_GFI1 | RNP_HOXA9 | RNP_IKZF1 | RNP_IRF2BP2 | RNP_IRF8 | RNP_LMO2 | RNP_LYL1 | ... | RNP_MEIS1 | RNP_MYB | RNP_MYBL2 | RNP_MYC | RNP_RUNX1 | RNP_RUNX2 | RNP_SP1 | RNP_SPI1 | RNP_ZEB2 | RNP_ZMYND8 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| RNP_CEBPA | 1.000000 | -0.008537 | 0.048766 | 0.107902 | -0.037238 | 0.016077 | 0.158899 | -0.049794 | 0.072230 | 0.105927 | ... | -0.067496 | 0.101652 | -0.002078 | -0.030426 | -0.033040 | -0.077050 | -0.062460 | -0.022006 | 0.066437 | -0.056477 |
| RNP_ELF2 | -0.008537 | 1.000000 | 0.215359 | 0.141933 | 0.222553 | 0.183503 | -0.038176 | 0.110852 | 0.166513 | 0.197812 | ... | 0.184303 | -0.155011 | 0.137739 | -0.091962 | 0.231626 | 0.186592 | 0.097690 | -0.062440 | -0.010590 | 0.182120 |
| RNP_FLI1 | 0.048766 | 0.215359 | 1.000000 | 0.137178 | 0.173498 | 0.080616 | -0.014686 | 0.105415 | 0.167749 | 0.203901 | ... | 0.138742 | -0.160537 | 0.103584 | -0.069645 | 0.137458 | 0.123273 | 0.081545 | 0.015253 | 0.016509 | 0.137007 |
| RNP_GFI1 | 0.107902 | 0.141933 | 0.137178 | 1.000000 | 0.157041 | 0.093638 | 0.058113 | -0.016914 | 0.138362 | 0.141911 | ... | 0.118756 | 0.021109 | 0.076480 | -0.160067 | 0.084592 | 0.071401 | 0.026020 | -0.127463 | -0.011549 | 0.009134 |
| RNP_HOXA9 | -0.037238 | 0.222553 | 0.173498 | 0.157041 | 1.000000 | 0.213662 | -0.008793 | 0.170174 | 0.157017 | 0.170661 | ... | 0.267093 | -0.093898 | 0.085874 | -0.131686 | 0.112273 | 0.108828 | 0.032246 | -0.104074 | -0.034953 | 0.146966 |
| RNP_IKZF1 | 0.016077 | 0.183503 | 0.080616 | 0.093638 | 0.213662 | 1.000000 | -0.053482 | 0.101399 | 0.043702 | 0.069725 | ... | 0.162034 | -0.067345 | 0.065476 | -0.075494 | 0.074574 | 0.089289 | 0.002667 | -0.097497 | -0.092110 | 0.111766 |
| RNP_IRF2BP2 | 0.158899 | -0.038176 | -0.014686 | 0.058113 | -0.008793 | -0.053482 | 1.000000 | 0.056821 | -0.014555 | -0.040366 | ... | 0.011694 | 0.242713 | -0.042318 | 0.270860 | -0.039152 | -0.021313 | 0.038669 | -0.007286 | -0.024535 | -0.082777 |
| RNP_IRF8 | -0.049794 | 0.110852 | 0.105415 | -0.016914 | 0.170174 | 0.101399 | 0.056821 | 1.000000 | 0.097484 | 0.082613 | ... | 0.136082 | -0.158932 | 0.012490 | -0.040294 | 0.014059 | 0.106629 | 0.044310 | 0.117184 | -0.014147 | 0.082807 |
| RNP_LMO2 | 0.072230 | 0.166513 | 0.167749 | 0.138362 | 0.157017 | 0.043702 | -0.014555 | 0.097484 | 1.000000 | 0.251558 | ... | 0.156986 | -0.147163 | 0.094381 | -0.136840 | 0.108782 | 0.098091 | 0.045805 | -0.037086 | 0.032999 | 0.126139 |
| RNP_LYL1 | 0.105927 | 0.197812 | 0.203901 | 0.141911 | 0.170661 | 0.069725 | -0.040366 | 0.082613 | 0.251558 | 1.000000 | ... | 0.136141 | -0.151312 | 0.118542 | -0.088426 | 0.162914 | 0.092221 | 0.067900 | -0.031155 | 0.045789 | 0.134117 |
| RNP_MAX | -0.051815 | 0.075814 | 0.091799 | 0.038743 | 0.081930 | 0.009678 | 0.102931 | 0.059595 | 0.073636 | 0.080743 | ... | 0.092537 | -0.063377 | 0.033817 | 0.194471 | 0.058472 | 0.035712 | 0.099445 | 0.004189 | -0.046842 | 0.081041 |
| RNP_MEF2C | -0.019360 | 0.204392 | 0.153736 | 0.124681 | 0.206500 | 0.148360 | -0.009866 | 0.149248 | 0.131753 | 0.147364 | ... | 0.179286 | -0.125295 | 0.107636 | -0.066348 | 0.127374 | 0.128158 | 0.033909 | -0.074747 | -0.006860 | 0.157437 |
| RNP_MEF2D | -0.134318 | 0.163622 | 0.097779 | -0.047653 | 0.120642 | 0.072161 | 0.036998 | 0.244304 | 0.031683 | 0.049696 | ... | 0.116311 | -0.115348 | 0.038506 | 0.047564 | 0.108192 | 0.147687 | 0.078911 | 0.042397 | -0.037240 | 0.107919 |
| RNP_MEIS1 | -0.067496 | 0.184303 | 0.138742 | 0.118756 | 0.267093 | 0.162034 | 0.011694 | 0.136082 | 0.156986 | 0.136141 | ... | 1.000000 | -0.102240 | 0.071364 | -0.068855 | 0.101477 | 0.099244 | 0.054090 | -0.097801 | -0.061431 | 0.123617 |
| RNP_MYB | 0.101652 | -0.155011 | -0.160537 | 0.021109 | -0.093898 | -0.067345 | 0.242713 | -0.158932 | -0.147163 | -0.151312 | ... | -0.102240 | 1.000000 | -0.093594 | 0.133327 | -0.156531 | -0.138671 | -0.090538 | -0.067535 | -0.089074 | -0.193815 |
| RNP_MYBL2 | -0.002078 | 0.137739 | 0.103584 | 0.076480 | 0.085874 | 0.065476 | -0.042318 | 0.012490 | 0.094381 | 0.118542 | ... | 0.071364 | -0.093594 | 1.000000 | -0.053873 | 0.110655 | 0.089374 | 0.006467 | -0.082938 | -0.034789 | 0.113884 |
| RNP_MYC | -0.030426 | -0.091962 | -0.069645 | -0.160067 | -0.131686 | -0.075494 | 0.270860 | -0.040294 | -0.136840 | -0.088426 | ... | -0.068855 | 0.133327 | -0.053873 | 1.000000 | -0.063258 | -0.085610 | 0.025459 | 0.020084 | -0.013748 | 0.015183 |
| RNP_RUNX1 | -0.033040 | 0.231626 | 0.137458 | 0.084592 | 0.112273 | 0.074574 | -0.039152 | 0.014059 | 0.108782 | 0.162914 | ... | 0.101477 | -0.156531 | 0.110655 | -0.063258 | 1.000000 | 0.212036 | 0.061517 | -0.055115 | -0.060962 | 0.163928 |
| RNP_RUNX2 | -0.077050 | 0.186592 | 0.123273 | 0.071401 | 0.108828 | 0.089289 | -0.021313 | 0.106629 | 0.098091 | 0.092221 | ... | 0.099244 | -0.138671 | 0.089374 | -0.085610 | 0.212036 | 1.000000 | 0.066548 | -0.027437 | -0.045105 | 0.131409 |
| RNP_SP1 | -0.062460 | 0.097690 | 0.081545 | 0.026020 | 0.032246 | 0.002667 | 0.038669 | 0.044310 | 0.045805 | 0.067900 | ... | 0.054090 | -0.090538 | 0.006467 | 0.025459 | 0.061517 | 0.066548 | 1.000000 | 0.194901 | -0.067507 | 0.018437 |
| RNP_SPI1 | -0.022006 | -0.062440 | 0.015253 | -0.127463 | -0.104074 | -0.097497 | -0.007286 | 0.117184 | -0.037086 | -0.031155 | ... | -0.097801 | -0.067535 | -0.082938 | 0.020084 | -0.055115 | -0.027437 | 0.194901 | 1.000000 | -0.070572 | -0.073515 |
| RNP_ZEB2 | 0.066437 | -0.010590 | 0.016509 | -0.011549 | -0.034953 | -0.092110 | -0.024535 | -0.014147 | 0.032999 | 0.045789 | ... | -0.061431 | -0.089074 | -0.034789 | -0.013748 | -0.060962 | -0.045105 | -0.067507 | -0.070572 | 1.000000 | -0.061035 |
| RNP_ZMYND8 | -0.056477 | 0.182120 | 0.137007 | 0.009134 | 0.146966 | 0.111766 | -0.082777 | 0.082807 | 0.126139 | 0.134117 | ... | 0.123617 | -0.193815 | 0.113884 | 0.015183 | 0.163928 | 0.131409 | 0.018437 | -0.073515 | -0.061035 | 1.000000 |
23 rows × 23 columns